twin_h2_c2_comparison_diff_twin_match <- readRDS(file=‘~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/MaTCHComparisonAnalysis/twin_h2_c2_comparison_diff_twin_match.rds’)— title: “Aetna Twins” author: “Chirag Lakhani” date: “8/9/2018” output: html_document —
allphewas_binary_zipcode_fixed_effect <- read_delim("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/twin_binary_zipcode_fixed_effect.csv",
" ", escape_double = FALSE, col_names = FALSE,
trim_ws = TRUE)
## Parsed with column specification:
## cols(
## .default = col_double(),
## X1 = col_character(),
## X2 = col_integer(),
## X3 = col_integer(),
## X4 = col_integer(),
## X5 = col_integer(),
## X6 = col_integer(),
## X7 = col_integer(),
## X8 = col_integer(),
## X9 = col_integer(),
## X61 = col_integer(),
## X62 = col_integer(),
## X63 = col_integer(),
## X64 = col_integer(),
## X65 = col_integer()
## )
## See spec(...) for full column specifications.
names(allphewas_binary_zipcode_fixed_effect) <- readRDS('~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/names_binary_zipcode_fixed_effect.rds')
allphewas_binary_zipcode_fixed_effect <- allphewas_binary_zipcode_fixed_effect %>%
mutate(h2_liab_SE_se = ifelse(phewas_code == '984',h2_SE,h2_liab_SE_se)) %>%
mutate(rliabss_SE_se = ifelse(phewas_code == '984',r_SS_SE,rliabss_SE_se)) %>%
mutate(c2_liab_SE_se = ifelse(phewas_code == '984',c2_SE,c2_liab_SE_se)) %>%
mutate(rliabos_SE_se = ifelse(phewas_code == '984',r_OS_SE,rliabos_SE_se))
names(allphewas_binary_zipcode_fixed_effect) <- sub("_se", "", names(allphewas_binary_zipcode_fixed_effect))
allphewas_binary_zipcode_fixed_effect <- allphewas_binary_zipcode_fixed_effect %>% inner_join(phewas, ., by='phewas_code')
## Warning: Column `phewas_code` joining factor and character vector, coercing
## into character vector
missingDFZipcode_fixed_effect <- allphewas_binary_zipcode_fixed_effect %>%
right_join(.,allphewas_binary %>% select(phewas_code), by='phewas_code') %>%
filter(is.na(phewas_description)) %>% select(phewas_code)
saveRDS(missingDFZipcode_fixed_effect, file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/orchestraAnalysis/missingDFZipcode_fixed_effect.rds')
allphewas_both <- allphewas_both %>%
mutate(h2_liab.zscore=h2_liab/h2_liab_SE) %>%
mutate(c2_liab.zscore=c2_liab/c2_liab_SE) %>%
mutate(h2_liab.pvalue=2*pnorm(-abs(h2_liab.zscore))) %>%
mutate(c2_liab.pvalue=2*pnorm(-abs(c2_liab.zscore))) %>%
mutate(pvalue_h2_FDR = p.adjust(.$h2_liab.pvalue, method='BY')) %>%
mutate(pvalue_c2_FDR = p.adjust(.$c2_liab.pvalue, method='BY')) %>%
mutate(pvalue_h2_bonferroni = p.adjust(.$h2_liab.pvalue, method='bonferroni')) %>%
mutate(pvalue_c2_bonferroni = p.adjust(.$c2_liab.pvalue, method='bonferroni')) %>%
mutate(observed_h2 = -log10(h2_liab.pvalue) ) %>%
mutate(observed_c2 = -log10(c2_liab.pvalue) ) %>%
mutate(rliabss_liab.zscore=rliabss/rliabss_SE) %>%
mutate(rliabos_liab.zscore=rliabos/rliabos_SE) %>%
mutate(rliabss_liab.pvalue=2*pnorm(-abs(rliabss_liab.zscore))) %>%
mutate(rliabos_liab.pvalue=2*pnorm(-abs(rliabos_liab.zscore))) %>%
mutate(observed_rliabss = -log10(rliabss_liab.pvalue) ) %>%
mutate(observed_rliabos = -log10(rliabos_liab.pvalue) ) %>%
mutate(tot_e2=h2_liab+c2_liab) %>%
mutate(tot_e2_SE=sqrt(h2_liab_SE^2+c2_liab_SE^2)) %>%
mutate(tot_e2.zscore=tot_e2/tot_e2_SE) %>%
mutate(tot_e2.pvalue=2*pnorm(-abs(tot_e2.zscore))) %>%
mutate(tot_e2.pvalue_FDR = p.adjust(tot_e2.pvalue, method='BY')) %>%
mutate(observed_tot_e2 = -log10(tot_e2.pvalue) ) %>%
mutate(e2=1-h2_liab-c2_liab) %>%
mutate(e2_SE=sqrt(h2_liab_SE^2 + c2_liab_SE^2)) %>%
mutate(e2.zscore=e2/e2_SE) %>%
mutate(e2.pvalue=2*pnorm(-abs(e2.zscore))) %>%
mutate(e2.pvalue_FDR = p.adjust(e2.pvalue, method='BY')) %>%
mutate(observed_e2 = -log10(e2.pvalue) ) %>%
mutate(beta_gender.zscore =`as.factor(Gender)M`/`as.factor(Gender)M_SE`) %>%
mutate(beta_gender.pvalue=2*pnorm(-abs(beta_gender.zscore))) %>%
mutate(pvalue_beta_gender_FDR = p.adjust(.$beta_gender.pvalue, method='BY')) %>%
mutate(beta_age.zscore =MemberBirthYear/MemberBirthYear_SE) %>%
mutate(beta_age.pvalue=2*pnorm(-abs(beta_age.zscore))) %>%
mutate(pvalue_beta_age_FDR = p.adjust(.$beta_age.pvalue, method='BY'))
allphewas_both_zipcode <- allphewas_both_zipcode %>%
mutate(h2_liab.zscore=h2_liab/h2_liab_SE) %>%
mutate(c2_liab.zscore=c2_liab/c2_liab_SE) %>%
mutate(h2_liab.pvalue=2*pnorm(-abs(h2_liab.zscore))) %>%
mutate(c2_liab.pvalue=2*pnorm(-abs(c2_liab.zscore))) %>%
mutate(pvalue_h2_FDR = p.adjust(.$h2_liab.pvalue, method='BY')) %>%
mutate(pvalue_c2_FDR = p.adjust(.$c2_liab.pvalue, method='BY')) %>%
mutate(observed_h2 = -log10(h2_liab.pvalue) ) %>%
mutate(observed_c2 = -log10(c2_liab.pvalue) ) %>%
mutate(rliabss_liab.zscore=rliabss/rliabss_SE) %>%
mutate(rliabos_liab.zscore=rliabos/rliabos_SE) %>%
mutate(rliabss_liab.pvalue=2*pnorm(-abs(rliabss_liab.zscore))) %>%
mutate(rliabos_liab.pvalue=2*pnorm(-abs(rliabos_liab.zscore))) %>%
mutate(observed_rliabss = -log10(rliabss_liab.pvalue) ) %>%
mutate(observed_rliabos = -log10(rliabos_liab.pvalue) ) %>%
mutate(tot_e2=h2_liab+c2_liab) %>%
mutate(tot_e2_SE=sqrt(h2_liab_SE^2+c2_liab_SE^2)) %>%
mutate(tot_e2.zscore=tot_e2/tot_e2_SE) %>%
mutate(tot_e2.pvalue=2*pnorm(-abs(tot_e2.zscore))) %>%
mutate(tot_e2.pvalue_FDR = p.adjust(tot_e2.pvalue, method='BY')) %>%
mutate(observed_tot_e2 = -log10(tot_e2.pvalue) ) %>%
mutate(e2=1-h2_liab-c2_liab-rliabincome-rliabtemp-rliabaqi) %>%
mutate(e2_SE=sqrt(h2_liab_SE^2+c2_liab_SE^2 + rliabincome_SE^2 + rliabaqi_SE^2 + rliabtemp_SE^2 )) %>%
mutate(e2.zscore=e2/e2_SE) %>%
mutate(e2.pvalue=2*pnorm(-abs(e2.zscore))) %>%
mutate(e2.pvalue_FDR = p.adjust(e2.pvalue, method='BY')) %>%
mutate(observed_e2 = -log10(e2.pvalue) ) %>%
mutate(rliabincome.zscore=rliabincome/rliabincome_SE) %>%
mutate(rliabincome.pvalue=2*pnorm(-abs(rliabincome.zscore))) %>%
mutate(rliabincome.pvalue_FDR = p.adjust(rliabincome.pvalue, method='BY')) %>%
mutate(observed_rliabincome = -log10(rliabincome.pvalue) ) %>%
mutate(rliabaqi.zscore=rliabaqi/rliabaqi_SE) %>%
mutate(rliabaqi.pvalue=2*pnorm(-abs(rliabaqi.zscore))) %>%
mutate(rliabaqi.pvalue_FDR = p.adjust(rliabaqi.pvalue, method='BY')) %>%
mutate(observed_rliabaqi = -log10(rliabaqi.pvalue) ) %>%
mutate(rliabtemp.zscore=rliabtemp/rliabtemp_SE) %>%
mutate(rliabtemp.pvalue=2*pnorm(-abs(rliabtemp.zscore))) %>%
mutate(rliabtemp.pvalue_FDR = p.adjust(rliabtemp.pvalue, method='BY')) %>%
mutate(observed_rliabtemp = -log10(rliabtemp.pvalue) )
rm(cnt_df, cost_df, allphewas_binary, allphewas_binary_zipcode,allphewas_quant_pheno, allphewas_quant_zipcode, phewas)
allphewas_both <- allphewas_both %>% mutate(shortName=phewas_description) %>%
mutate(shortName=ifelse(phewas_code =='313.1','ADHD',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='134577','LDL Cholesterol',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='706.1','Acne',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='495','Asthma',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='637','Low Birthweight',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='464','Acute Sinusitis',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='984','Lead Poisoning',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='871.1','Hand Wound',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='871.3','Foot Wound',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='800.3','Tibia/Fibula Fracture',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='726.3','Bursitis',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='696.4','Psoriasis',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='696.4','Psoriasis',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='7187','Hemoglobin',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='362.1','Retinopathy',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='313','Pervasive Development Disorder',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='313.1','ADHD',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='COST','Cost',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='CNT','Comorbids',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='984','Lead Poisoning',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='367','Low Vision',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='769','Nonallopathic Lesion',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='465','Respiratory Infections',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='870','Head Wound',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='381','Otitis Media',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='474.2','Chronic Tonsillitis',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='524','Dentofacial Anomalies',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='930','Food Allergy',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='939','Atopic/Contact Dermatitis',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='706','Sebaceous Gland',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='726','Peripheral Enthesopathies',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='859','Complication Due to Implant',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='343','Cerebral Palsy',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='316','Substance Addiction',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='555','Inflammatory Bowel Disease',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='345','Epilepsy, recurrent seizures',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='758','Chromosomal anomalies/genetic disorders',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='747','Cardiac/circulatory congenital anomalies',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='656.2','Respiratory Condition Newborn',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='656','Perinatal Condition Newborn',shortName))
allphewas_both_zipcode <- allphewas_both_zipcode %>% mutate(shortName=phewas_description) %>%
mutate(shortName=ifelse(phewas_code =='313.1','ADHD',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='134577','LDL Cholesterol',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='706.1','Acne',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='495','Asthma',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='637','Low Birthweight',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='464','Acute Sinusitis',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='984','Lead Poisoning',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='871.1','Hand Wound',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='871.3','Foot Wound',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='800.3','Tibia/Fibula Fracture',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='726.3','Bursitis',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='696.4','Psoriasis',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='696.4','Psoriasis',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='7187','Hemoglobin',shortName)) %>%
mutate(shortName=ifelse(phewas_code =='362.1','Retinopathy',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='313','Pervasive Development Disorder',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='313.1','ADHD',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='COST','Cost',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='CNT','Comorbids',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='984','Lead Poisoning',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='367','Low Vision',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='769','Nonallopathic Lesion',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='465','Respiratory Infections',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='870','Head Wound',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='381','Otitis Media',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='474.2','Chronic Tonsillitis',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='524','Dentofacial Anomalies',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='930','Food Allergy',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='939','Atopic/Contact Dermatitis',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='706','Sebaceous Gland',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='726','Peripheral Enthesopathies',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='859','Complication Due to Implant',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='345','Epilepsy/recurrent seizures',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='758','Chromosomal anomalies/genetic disorders',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='747','Cardiac/circulatory congenital anomalies',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='656.2','Respiratory Condition Newborn',shortName)) %>%
mutate(shortName = ifelse(phewas_code =='656','Perinatal Condition Newborn',shortName))
phewas_functional_domain_map_quant_trait <- quant_traits_subset %>%
inner_join(.,quant_pheno_description, by='phewas_code') %>%
select(phewas_code, phewas_description=Description) %>%
mutate(functional_domain = case_when(phewas_code == 'CNT' ~ 'PheWAS Comorbids',
phewas_code == 'COST' ~ 'Avg. Monthly Cost',
TRUE ~ 'Quantitative'))
phewas_functional_domain_map <- phewas_functional_domain_map %>% bind_rows(.,phewas_functional_domain_map_quant_trait)
phewas_functional_domain_map_all <- phewas_functional_domain_map %>%
select(phewas_code, phewas_description) %>% mutate(functional_domain = 'All Traits') %>% unique()
phewas_functional_domain_map <- phewas_functional_domain_map %>% bind_rows(.,phewas_functional_domain_map_all)
allphewas_both_functional_domain <- allphewas_both %>%
inner_join(.,phewas_functional_domain_map, by=c('phewas_code', 'phewas_description'))
allphewas_both_zipcode_functional_domain <- allphewas_both_zipcode %>%
inner_join(.,phewas_functional_domain_map, by=c('phewas_code', 'phewas_description'))
rm(phewas_functional_domain_map_quant_trait,phewas_functional_domain_map_all)
totalPheno <- nrow(allphewas_both)
totalPheno
## [1] 561
countTwin <- demographic_information %>% group_by(genderPairType) %>% tally() %>% spread(.,genderPairType, n) %>%
mutate(type = 'binary') %>% mutate(binYOB = 'All')
countTwin_birth <- demographic_information %>%
mutate(binYOB = cut(yearT1, breaks=c(1985,1996,2006,2016), dig.lab=4, include.lowest=TRUE, right=FALSE ) ) %>% group_by(binYOB,genderPairType) %>% tally() %>% spread(.,genderPairType, n) %>% ungroup()
countTwin_birth <- countTwin_birth %>% mutate(type='binary')
countTwin_all <- countTwin %>% bind_rows(.,countTwin_birth)
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
countTwin_all <- countTwin_all %>% mutate(SS=FF+MM) %>% mutate(OS = MF)
YearOfBirth <- readRDS('~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/analysis/quant_raw_data/revised/quant_pheno_YearOfBirthCount.rds')
count_quant_binYOB <- YearOfBirth %>% filter(phewas_code == '7187') %>% filter(cohort == 'subset') %>% mutate(binYOB = cut(yearBirth, breaks=c(1985,1996,2006,2016),include.lowest = TRUE,dig.lab=4, right=FALSE)) %>%
group_by(binYOB, genderConfig, cohort) %>% summarise(n=sum(numPairs)) %>%ungroup() %>% select(-cohort) %>% spread(.,genderConfig,n) %>% mutate(type = 'quant')
count_quant_all <- YearOfBirth %>% filter(phewas_code == '7187') %>% filter(cohort == 'subset') %>%
group_by(genderConfig, cohort) %>% summarise(n=sum(numPairs)) %>%ungroup() %>% select(-cohort) %>% spread(.,genderConfig,n) %>% mutate(type = 'quant') %>% mutate(binYOB = 'All')
count_everything <- countTwin_all %>%
bind_rows(.,count_quant_all,count_quant_binYOB) %>%
select(-MM,-MF,-FF) %>% mutate(n=SS+OS ) %>%
mutate(pss=SS/n,pmz=1-2*(OS/n)) %>%
mutate(p=pmz/pss) %>%
mutate(var_p = (n*OS)/(SS^3)) %>% mutate(p_SE = sqrt(var_p)) %>%
mutate(p_low = p-1.96*p_SE, p_high = p+1.96*p_SE) %>% select(-pss,-pmz,-var_p)
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
count_everything <- round_df(count_everything,3)
DT::datatable(count_everything)
## # A tibble: 1 x 3
## median IQ25 IQ75
## <dbl> <dbl> <dbl>
## 1 23 12 42
## # A tibble: 3 x 4
## genderPairType median IQ25 IQ75
## <chr> <dbl> <dbl> <dbl>
## 1 FF 23 12 41
## 2 MF 24 13 44
## 3 MM 22 11 41
## # A tibble: 1 x 3
## median IQ25 IQ75
## <dbl> <dbl> <dbl>
## 1 7 3 13
## # A tibble: 3 x 4
## genderPairType median IQ25 IQ75
## <chr> <dbl> <dbl> <dbl>
## 1 FF 8 3 14
## 2 MF 7 2 12
## 3 MM 8 3 13
## # A tibble: 1 x 3
## median IQ25 IQ75
## <dbl> <dbl> <dbl>
## 1 60 45 84
## # A tibble: 3 x 4
## genderPairType median IQ25 IQ75
## <chr> <dbl> <dbl> <dbl>
## 1 FF 60 45 84
## 2 MF 60 45 84
## 3 MM 60 45 84
## # A tibble: 1 x 1
## Unique_Elements
## <int>
## 1 11666
## # A tibble: 3 x 2
## genderPairType Unique_Elements
## <chr> <int>
## 1 FF 7302
## 2 MF 7466
## 3 MM 7235
prevalence_aggregate <- readRDS(file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/analysis/prevalence_data/prevalence_aggregate.rds')
phewas_functional_domain_map_binary_no_all <- phewas_functional_domain_map %>%
filter(functional_domain !='Quantitative') %>%
filter(functional_domain !='Avg. Monthly Cost') %>%
filter(functional_domain !='PheWAS Comorbids') %>%
filter(functional_domain !='All Traits')
prev_all <- prevalence_aggregate %>%
group_by(phewas_code, PheWAS_Indicator) %>%
summarise(n=sum(countTwin)) %>%
spread(PheWAS_Indicator, n, fill=0) %>%
mutate(prevalence = `1` / (`0` + `1`) ) %>%
right_join(., phewas_functional_domain_map_binary_no_all, by='phewas_code') %>%
mutate(category = 'All')
prev_M <- prevalence_aggregate %>%
group_by(phewas_code, PheWAS_Indicator, Gender) %>%
summarise(n=sum(countTwin)) %>%
filter(Gender == 'M') %>% select(-Gender) %>%
spread(PheWAS_Indicator, n, fill=0) %>%
mutate(prevalence = `1` / (`0` + `1`) ) %>%
right_join(., phewas_functional_domain_map_binary_no_all, by='phewas_code') %>%
mutate(category = 'Male')
prev_F <- prevalence_aggregate %>%
group_by(phewas_code, PheWAS_Indicator, Gender) %>%
summarise(n=sum(countTwin)) %>%
filter(Gender == 'F') %>% select(-Gender) %>%
spread(PheWAS_Indicator, n, fill=0) %>%
mutate(prevalence = `1` / (`0` + `1`) ) %>%
right_join(., phewas_functional_domain_map_binary_no_all, by='phewas_code') %>%
mutate(category = 'Female')
prev <- prev_all %>% rbind(.,prev_M) %>% rbind(., prev_F) %>% filter(!is.na(functional_domain ))
rm(prev_all, prev_F, prev_M)
#functional_domain_summary_all <- prev %>% select(phewas_code,prevalence, category) %>% unique() %>% group_by(category) %>%summarise(median = median(prevalence, na.rm = TRUE), IQ25 = quantile(prevalence, .25), IQ75 = quantile(prevalence,.75), numTraits = n_distinct(phewas_code))
#functional_domain_summary_all$functional_domain <- 'All'
#functional_domain_summary <- prev %>% group_by(functional_domain, category) %>% summarise(median = median(prevalence, na.rm = TRUE), IQ25 = quantile(prevalence, .25), IQ75 = quantile(prevalence,.75), numTraits = n_distinct(phewas_code))
#functional_domain_summary <- functional_domain_summary %>% bind_rows(.,functional_domain_summary_all)
#DT::datatable(functional_domain_summary) %>%
# formatSignif('median',2) %>%
# formatSignif('IQ25',2) %>%
# formatSignif('IQ75',2)
#DT::datatable(functional_domain_summary %>% select(functional_domain) %>% unique())
DT::datatable(prev)
plot_fig <- ggplot(prev, aes(functional_domain, prevalence)) +
geom_boxplot(aes(colour=category), outlier.alpha = 0.4) +
scale_y_log10() +
theme(axis.text.x=element_text(angle=45, size = 8),
axis.title.x = element_text(vjust=10)) +
ylab('Prevalence') + xlab('Functional Domain') +
theme(legend.position="bottom",legend.box = "horizontal")
ggsave(plot_fig, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/prevalence_functional_category.png', height=3.5, width=6)
plot_fig
map_prevalence_data <- readRDS(all,file = '~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/maps/map_prevalence_data.rds')
pp <- ggplot() +
geom_polygon(data = map_prevalence_data,
aes(x = long, y = lat, group = group, fill = numPairs),
color = "black", size = 0.25) +
coord_map() +
labs(fill = "Number of Twin Pairs") + xlim(-140,-66) + theme_map()
#pp
df_Pop <- readRDS(file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/SES_Analysis/popDensity_twin_ACS.rds')
df_SES <- readRDS(file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/SES_Analysis/SES_twin_ACS.rds')
popDensityPlot <- ggplot(df_Pop, aes(x=densityBin, y=normalizedPop)) +
geom_bar(stat='identity', aes(fill = cohort)) +
facet_wrap(~cohort,nrow = 1) +
theme(axis.text.x = element_text(angle = 60, size=1),
axis.title.x = element_text(size=8),
axis.title.y = element_text(size=8)) +
xlab("Log of Population Per Square Mile Area") +
ylab("% Total Population")
sesDensityPlot <- ggplot(df_SES, aes(x=SES_bin, y=normalizedPop)) +
geom_bar(stat='identity', aes(fill = cohort)) +
facet_wrap(~cohort,nrow = 1) +
theme(axis.text.x = element_text(angle = 60, size=1),
axis.title.x = element_text(size=8),
axis.title.y = element_text(size=8)) +
xlab("Depravity Index Score") + ylab("% Total Population")
figure1a <- ggarrange(popDensityPlot,sesDensityPlot,labels = c( 'b)','c)'),
common.legend=TRUE, ncol = 2, legend = 'bottom',
font.label = list(size = 10))
figure1 <- ggarrange(pp,figure1a,labels = c( "a)", ''), common.legend=FALSE, nrow = 2, ncol = 1,legend = 'top',
font.label = list(size = 10))
#figure1
ggsave(figure1,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure1.png')
## Saving 7 x 5 in image
PCLoadings_500Cities <- readRDS(file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/data/PCLoadings_500Cities.rds')
cor_fit_deprav_income <- cor.test(-PCLoadings_500Cities$PC1,PCLoadings_500Cities$medianincome)
cor_fit_deprav_income
##
## Pearson's product-moment correlation
##
## data: -PCLoadings_500Cities$PC1 and PCLoadings_500Cities$medianincome
## t = 269.61, df = 26565, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8525273 0.8589647
## sample estimates:
## cor
## 0.8557791
all_traits_df <- data.frame(functional_domain = 'All Traits',
r_MZ = 0.636,
r_MZ_SE = 0.002,
r_DZ=0.339,
r_DZ_SE = 0.003,
h2 = 0.488,
h2_SE = 0.005,
c2 = 0.174,
c2_SE = 0.004,
h2_corr = 0.593,
h2_corr_SE = 0.008,
c2_corr = 0.042,
c2_corr_SE = 0.007,
r_MZ_nstudies = NA ,
r_MZ_ntraits = 9568,
r_DZ_nstudies = NA,
r_DZ_ntraits = 5220,
h2_nstudies = NA,
h2_ntraits = 2929,
c2_nstudies = NA,
c2_ntraits = 2771)
functional_domain_df <- readRDS(file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/analysis/match_phewas_mapping/functional_domain/functional_domain.rds')
functional_domain_df <- functional_domain_df %>% rbind(.,all_traits_df)
functional_domain_df <- functional_domain_df %>%
filter(functional_domain != 'Socialinteractions') %>%
filter(functional_domain != 'Socialvalues') %>%
filter(functional_domain != 'Cell') %>%
filter(functional_domain != 'Activities') %>%
filter(functional_domain != 'Nutritional') %>%
filter(functional_domain != 'Mortality') %>%
filter(functional_domain != 'Muscular')
zz <- allphewas_both_functional_domain %>%
filter(phewas_code != 'COST') %>%
filter(phewas_code != 'CNT') %>%
split(.$functional_domain) %>% purrr::map(~rma(yi=h2_liab,sei=h2_liab_SE,data=., method='DL'))
## Warning in rma(yi = h2_liab, sei = h2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.
## Warning in rma(yi = h2_liab, sei = h2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.
## Warning in rma(yi = h2_liab, sei = h2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.
phewas_b_h2 <- zz %>% purrr::map(summary) %>% map_df(~.$b[1]) %>% gather(functional_domain, h2_liab,1:ncol(.))
phewas_se_h2 <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, h2_liab_SE,1:ncol(.))
phewas_meta_h2_liab_all <- phewas_b_h2 %>% inner_join(.,phewas_se_h2, by='functional_domain') %>% mutate(category = 'Insurance Claims')
names(phewas_meta_h2_liab_all) <- c('functional_domain','h2','h2_SE', 'category')
MaTCH_h2 <- functional_domain_df %>% select(functional_domain, h2_corr, h2_corr_SE) %>% mutate(category = 'MaTCH')
names(MaTCH_h2) <- c('functional_domain','h2','h2_SE', 'category')
metaAll_justMatch_h2 <- phewas_meta_h2_liab_all %>% rbind(.,MaTCH_h2)
metaAll_justMatch_h2 <- metaAll_justMatch_h2 %>%
mutate(h2_low = h2-1.96*h2_SE, h2_high = h2+1.96*h2_SE )
DT::datatable(round_df(metaAll_justMatch_h2, digits=3))
rm(zz, phewas_b_h2, phewas_se_h2, MaTCH_h2)
zz <- allphewas_both_functional_domain %>%
filter(phewas_code != 'COST') %>%
filter(phewas_code != 'CNT') %>%
split(.$functional_domain) %>% purrr::map(~rma(yi=c2_liab,sei=c2_liab_SE,data=., method='DL'))
## Warning in rma(yi = c2_liab, sei = c2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.
## Warning in rma(yi = c2_liab, sei = c2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.
## Warning in rma(yi = c2_liab, sei = c2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.
phewas_b <- zz %>% purrr::map(summary) %>% map_df(~.$b[1]) %>% gather(functional_domain, c2_liab,1:ncol(.))
phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, c2_liab_SE,1:ncol(.))
phewas_meta_c2_liab_all <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')
names(phewas_meta_c2_liab_all) <- c('functional_domain','c2','c2_SE', 'category')
MaTCH <- functional_domain_df %>% select(functional_domain, c2_corr, c2_corr_SE) %>% mutate(category = 'MaTCH')
names(MaTCH) <- c('functional_domain','c2','c2_SE', 'category')
metaAll_justMatch_c2 <- phewas_meta_c2_liab_all %>% rbind(.,MaTCH)
metaAll_justMatch_c2 <- metaAll_justMatch_c2 %>%
mutate(c2_low = c2-1.96*c2_SE, c2_high = c2+1.96*c2_SE )
DT::datatable(round_df(metaAll_justMatch_c2, digits=3))
rm(zz, phewas_b, phewas_se, MaTCH)
zz <- allphewas_both_functional_domain %>%
filter(phewas_code != 'COST') %>%
filter(phewas_code != 'CNT') %>%
split(.$functional_domain) %>% purrr::map(~rma(yi=e2,sei=e2_SE,data=., method='DL'))
## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.
## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.
## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.
phewas_b <- zz %>% purrr::map(summary) %>% map_df(~.$b[1]) %>% gather(functional_domain, e2,1:ncol(.))
phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, e2_SE,1:ncol(.))
phewas_meta_e2_liab_all <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')
names(phewas_meta_e2_liab_all) <- c('functional_domain','e2','e2_SE', 'category')
rm(zz, phewas_b, phewas_se)
zz <- allphewas_both_functional_domain %>%
filter(phewas_code != 'COST') %>%
filter(phewas_code != 'CNT') %>%
split(.$functional_domain) %>% purrr::map(~rma(yi=rliabss,sei=rliabss_SE,data=., method='DL'))
## Warning in rma(yi = rliabss, sei = rliabss_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.
## Warning in rma(yi = rliabss, sei = rliabss_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.
## Warning in rma(yi = rliabss, sei = rliabss_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.
phewas_b <- zz %>% purrr::map(summary) %>% map_df(~.$b[1]) %>% gather(functional_domain, rliabss,1:ncol(.))
phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, rliabss_SE,1:ncol(.))
phewas_meta_rss_liab_all <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')
names(phewas_meta_rss_liab_all) <- c('functional_domain','rSS','rSS_SE', 'category')
output_rliabSS <- phewas_meta_rss_liab_all %>%
mutate(rSS_low = rSS-1.96*rSS_SE, rSS_high = rSS+1.96*rSS_SE )
DT::datatable(round_df(output_rliabSS, digits=3))
zz <- allphewas_both_functional_domain %>%
filter(phewas_code != 'COST') %>%
filter(phewas_code != 'CNT') %>% filter(rliabos_SE > .00000001) %>%
split(.$functional_domain) %>% purrr::map(~rma(yi=rliabos,sei=rliabos_SE,data=., method='DL'))
phewas_b <- zz %>% purrr::map(summary) %>% map_df(~.$b[1]) %>% gather(functional_domain, rliabos,1:ncol(.))
phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, rliabos_SE,1:ncol(.))
phewas_meta_ros_liab_all <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')
names(phewas_meta_ros_liab_all) <- c('functional_domain','rOS','rOS_SE', 'category')
output_rliabOS <- phewas_meta_ros_liab_all %>%
mutate(rOS_low = rOS-1.96*rOS_SE, rOS_high = rOS+1.96*rOS_SE )
DT::datatable(round_df(output_rliabOS, digits=3))
metaAll_justMatch_h2$functional_domain <- factor(metaAll_justMatch_h2$functional_domain,
levels = c('All Traits',
'Aging',
'Cardiovascular',
'Cognitive',
'Connective tissue',
'Dermatological',
'Developmental',
'Ear,Nose,Throat',
'Endocrine',
'Environment',
'Gastrointestinal',
'Hematological',
'Immunological',
'Infection',
'Metabolic',
'Neoplasms',
'Neurological',
'Ophthalmological',
'Psychiatric',
'Reproduction',
'Respiratory',
'Skeletal',
'Quantitative',
'Avg. Monthly Cost',
'PheWAS Comorbids'))
metaAll_justMatch_c2$functional_domain <- factor(metaAll_justMatch_c2$functional_domain,
levels = c('All Traits',
'Aging',
'Cardiovascular',
'Cognitive',
'Connective tissue',
'Dermatological',
'Developmental',
'Ear,Nose,Throat',
'Endocrine',
'Environment',
'Gastrointestinal',
'Hematological',
'Immunological',
'Infection',
'Metabolic',
'Neoplasms',
'Neurological',
'Ophthalmological',
'Psychiatric',
'Reproduction',
'Respiratory',
'Skeletal',
'Quantitative',
'Avg. Monthly Cost',
'PheWAS Comorbids'))
h2_liab_ggplot <- ggplot(metaAll_justMatch_h2, aes(y=h2, x=functional_domain, colour=category)) +
geom_point(position = position_dodge(width = .5)) +
geom_errorbar(aes(ymin=h2-1.96*h2_SE, ymax=h2+1.96*h2_SE),position = position_dodge(width = 0.5)) +
coord_flip() +
theme(axis.text=element_text(size=8)) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 80)) +
theme(axis.title = element_text()) + ylab('Heritability (h2)') + xlab('Functional Domain')
ggsave(h2_liab_ggplot,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/h2_liab_comparison_match.png', height=3.5, width=6)
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_errorbar).
c2_liab_ggplot <- ggplot(metaAll_justMatch_c2, aes(y=c2, x=functional_domain, colour=category)) +
geom_point(position = position_dodge(width = .5)) +
geom_errorbar(aes(ymin=c2-1.96*c2_SE, ymax=c2+1.96*c2_SE),position = position_dodge(width = 0.5)) +
coord_flip() +
theme(axis.text=element_text(size=8)) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 80)) +
theme(axis.title = element_text()) + ylab('Shared Environmental Variance (c2)') + xlab('Functional Domain')
ggsave(c2_liab_ggplot,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/c2_liab_comparison_match.png', height=3.5, width=6)
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_errorbar).
h2_c2_MaTCH <- ggarrange(h2_liab_ggplot,c2_liab_ggplot,labels = c( "a)", 'b)'), nrow = 2, ncol = 1, common.legend=TRUE,legend='bottom' )
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_errorbar).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_errorbar).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_errorbar).
#h2_c2_MaTCH
ggsave(h2_c2_MaTCH,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/h2_c2_MaTCH.png', height = 10, width=8)
phewas_meta_h2_liab_all$stat <- 'h2'
phewas_meta_c2_liab_all$stat <- 'c2'
#phewas_meta_e2_liab_all$stat <- 'e2'
phewas_meta_rss_liab_all$stat <- 'rOS'
phewas_meta_ros_liab_all$stat <- 'rSS'
a <- phewas_meta_h2_liab_all %>% select(functional_domain, statistic = h2, stat)
b <- phewas_meta_c2_liab_all %>% select(functional_domain, statistic = c2, stat)
#c <- phewas_meta_e2_liab_all %>% select(functional_domain, statistic = e2, stat)
d <- phewas_meta_ros_liab_all %>% select(functional_domain, statistic = rOS, stat)
e <- phewas_meta_rss_liab_all %>% select(functional_domain, statistic = rSS, stat)
z_meta_all <- a %>% bind_rows(.,b,d,e)
rm(a,b,d,e)
z_meta_all$stat <- factor(z_meta_all$stat, levels=c('rOS','rSS', 'h2','c2'))
z_meta_all$functional_domain <- factor(z_meta_all$functional_domain,
levels = c('All Traits',
'Aging',
'Cardiovascular',
'Cognitive',
'Connective tissue',
'Dermatological',
'Developmental',
'Ear,Nose,Throat',
'Endocrine',
'Environment',
'Gastrointestinal',
'Hematological',
'Immunological',
'Infection',
'Metabolic',
'Neoplasms',
'Neurological',
'Ophthalmological',
'Psychiatric',
'Reproduction',
'Respiratory',
'Skeletal',
'Quantitative',
'Avg. Monthly Cost',
'PheWAS Comorbids'))
barchart_fn_domain <- ggplot(z_meta_all, aes(functional_domain, statistic, fill=stat)) +
geom_bar(stat="identity", position="dodge", width = 0.75, colour="black") +
theme(axis.text.x=element_text(angle=45), text=element_text(size=8) ) +
theme(axis.title = element_text()) +
ylab('Statistic Value') + xlab('Functional Domain') + labs(fill='Statistic')
barchart_fn_domain
ggsave(barchart_fn_domain, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/barchart_fn_domain.png')
## Saving 7 x 5 in image
zz <- allphewas_both_zipcode_functional_domain %>%
filter(phewas_code != 'COST') %>%
filter(phewas_code != 'CNT') %>%
split(.$functional_domain) %>% purrr::map(~rma(yi=h2_liab,sei=h2_liab_SE,data=., method='DL'))
## Warning in rma(yi = h2_liab, sei = h2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.
## Warning in rma(yi = h2_liab, sei = h2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.
phewas_b <- zz %>% purrr::map(summary) %>% map_df(~.$b[1]) %>% gather(functional_domain, h2_liab,1:ncol(.))
phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, h2_liab_SE,1:ncol(.))
environment_meta_h2_liab_all_environment <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')
names(environment_meta_h2_liab_all_environment) <- c('functional_domain','h2','h2_SE', 'category')
environment_meta_h2_liab_all_environment <- environment_meta_h2_liab_all_environment %>%
mutate(h2_low=h2-1.96*h2_SE,h2_high=h2+1.96*h2_SE )
DT::datatable(round_df(environment_meta_h2_liab_all_environment, digits=3))
rm(zz,phewas_b, phewas_se)
zz <- allphewas_both_zipcode_functional_domain %>%
filter(phewas_code != 'COST') %>%
filter(phewas_code != 'CNT') %>%
split(.$functional_domain) %>% purrr::map(~rma(yi=c2_liab,sei=c2_liab_SE,data=., method='DL'))
## Warning in rma(yi = c2_liab, sei = c2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.
## Warning in rma(yi = c2_liab, sei = c2_liab_SE, data = ., method = "DL"):
## Studies with NAs omitted from model fitting.
phewas_b <- zz %>% purrr::map(summary) %>% map_df(~.$b[1]) %>% gather(functional_domain, c2_liab,1:ncol(.))
phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, c2_liab_SE,1:ncol(.))
environment_meta_c2_liab_all_environment <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')
names(environment_meta_c2_liab_all_environment) <- c('functional_domain','c2','c2_SE', 'category')
environment_meta_c2_liab_all_environment <- environment_meta_c2_liab_all_environment %>% mutate(c2_low=c2-1.96*c2_SE,c2_high=c2+1.96*c2_SE )
DT::datatable(round_df(environment_meta_c2_liab_all_environment, digits=3))
rm(zz,phewas_b, phewas_se)
zz <- allphewas_both_zipcode_functional_domain %>%
filter(phewas_code != 'COST') %>%
filter(phewas_code != 'CNT') %>%
split(.$functional_domain) %>% purrr::map(~rma(yi=e2,sei=e2_SE,data=., method='DL'))
## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.
## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.
## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.
## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.
## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.
## Warning in rma(yi = e2, sei = e2_SE, data = ., method = "DL"): Studies with
## NAs omitted from model fitting.
phewas_b <- zz %>% purrr::map(summary) %>% map_df(~.$b[1]) %>% gather(functional_domain, e2,1:ncol(.))
phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, e2_SE,1:ncol(.))
environment_meta_e2_liab_all_environment <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')
names(environment_meta_e2_liab_all_environment) <- c('functional_domain','e2','e2_SE', 'category')
environment_meta_e2_liab_all_environment <- environment_meta_e2_liab_all_environment %>%
mutate(e2_low=e2-1.96*e2_SE,e2_high=e2+1.96*e2_SE )
DT::datatable(round_df(environment_meta_e2_liab_all_environment, digits=3))
rm(zz,phewas_b, phewas_se)
zz <- allphewas_both_zipcode_functional_domain %>%
filter(phewas_code != 'COST') %>%
filter(phewas_code != 'CNT') %>%
split(.$functional_domain) %>% purrr::map(~rma(yi=rliabincome,sei=rliabincome_SE,data=., method='DL'))
## Warning in rma(yi = rliabincome, sei = rliabincome_SE, data = ., method =
## "DL"): Studies with NAs omitted from model fitting.
## Warning in rma(yi = rliabincome, sei = rliabincome_SE, data = ., method =
## "DL"): Studies with NAs omitted from model fitting.
## Warning in rma(yi = rliabincome, sei = rliabincome_SE, data = ., method =
## "DL"): Studies with NAs omitted from model fitting.
## Warning in rma(yi = rliabincome, sei = rliabincome_SE, data = ., method =
## "DL"): Studies with NAs omitted from model fitting.
## Warning in rma(yi = rliabincome, sei = rliabincome_SE, data = ., method =
## "DL"): Studies with NAs omitted from model fitting.
phewas_b <- zz %>% purrr::map(summary) %>% map_df(~.$b[1]) %>% gather(functional_domain, rliabincome,1:ncol(.))
phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, rliabincome_SE,1:ncol(.))
environment_meta_rliabincome_all_environment <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')
names(environment_meta_rliabincome_all_environment) <- c('functional_domain','var_income','var_income_SE', 'category')
environment_meta_rliabincome_all_environment <- environment_meta_rliabincome_all_environment %>%
mutate(var_income_low=var_income-1.96*var_income_SE,var_income_high=var_income+1.96*var_income_SE )
DT::datatable(round_df(environment_meta_rliabincome_all_environment, digits=3))
rm(zz,phewas_b, phewas_se)
zz <- allphewas_both_zipcode_functional_domain %>%
filter(phewas_code != 'COST') %>%
filter(phewas_code != 'CNT') %>%
split(.$functional_domain) %>% purrr::map(~rma(yi=rliabaqi,sei=rliabaqi_SE,data=., method='DL'))
phewas_b <- zz %>% purrr::map(summary) %>% map_df(~.$b[1]) %>% gather(functional_domain, rliabaqi,1:ncol(.))
phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, rliabaqi_SE,1:ncol(.))
environment_meta_rliabaqi_all_environment <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>% mutate(category = 'Insurance Claims')
names(environment_meta_rliabaqi_all_environment) <- c('functional_domain','var_aqi','var_aqi_SE', 'category')
environment_meta_rliabaqi_all_environment <- environment_meta_rliabaqi_all_environment %>%
mutate(var_aqi_low=var_aqi-1.96*var_aqi_SE, var_aqi_high=var_aqi+1.96*var_aqi_SE )
DT::datatable(round_df(environment_meta_rliabaqi_all_environment, digits=3))
rm(zz,phewas_b, phewas_se)
zz <- allphewas_both_zipcode_functional_domain %>%
filter(phewas_code != 'COST') %>%
filter(phewas_code != 'CNT') %>%
split(.$functional_domain) %>% purrr::map(~rma(yi=rliabtemp,sei=rliabtemp_SE,data=., method='DL'))
## Warning in rma(yi = rliabtemp, sei = rliabtemp_SE, data = ., method =
## "DL"): Studies with NAs omitted from model fitting.
## Warning in rma(yi = rliabtemp, sei = rliabtemp_SE, data = ., method =
## "DL"): Studies with NAs omitted from model fitting.
phewas_b <- zz %>% purrr::map(summary) %>% map_df(~.$b[1]) %>% gather(functional_domain, rliabtemp,1:ncol(.))
phewas_se <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, rliabtemp_SE,1:ncol(.))
environment_meta_rliabtemp_all_environment <- phewas_b %>% inner_join(.,phewas_se, by='functional_domain') %>%
mutate(category = 'Insurance Claims')
names(environment_meta_rliabtemp_all_environment) <- c('functional_domain','var_temp','var_temp_SE', 'category')
environment_meta_rliabtemp_all_environment <- environment_meta_rliabtemp_all_environment %>%
mutate(var_temp_low=var_temp-1.96*var_temp_SE, var_temp_high=var_temp+1.96*var_temp_SE )
DT::datatable(round_df(environment_meta_rliabtemp_all_environment, digits=3))
rm(zz,phewas_b, phewas_se)
DT::datatable(allphewas_both_zipcode %>% filter(rliabincome.pvalue_FDR <= 0.05) %>% summarise(n=n()) )
DT::datatable(allphewas_both_zipcode %>% filter(rliabaqi.pvalue_FDR <= 0.05) %>% summarise(n=n()) )
DT::datatable(allphewas_both_zipcode %>% filter(rliabtemp.pvalue_FDR <= 0.05) %>% summarise(n=n()) )
var_environment_confidence_interval <- allphewas_both_zipcode %>%
select(phewas_code, phewas_description, rliabincome, rliabincome_SE, rliabaqi, rliabaqi_SE, rliabtemp, rliabtemp_SE) %>%
mutate(rliabincome_low=rliabincome-1.96*rliabincome_SE, rliabincome_high=rliabincome+1.96*rliabincome_SE) %>%
mutate(rliabaqi_low=rliabaqi-1.96*rliabaqi_SE, rliabaqi_high=rliabaqi+1.96*rliabaqi_SE) %>%
mutate(rliabtemp_low=rliabtemp-1.96*rliabtemp_SE, rliabtemp_high=rliabtemp+1.96*rliabtemp_SE) %>%
select(phewas_code, phewas_description, rliabincome,rliabincome_low,rliabincome_high,rliabaqi,rliabaqi_low,rliabaqi_high,rliabtemp,rliabtemp_low,rliabtemp_high)
DT::datatable(round_df(var_environment_confidence_interval,digits = 3))
environment_meta_h2_liab_all_environment$stat <- 'h2'
environment_meta_c2_liab_all_environment$stat <- 'c2'
#environment_meta_e2_liab_all_environment$stat <- 'e2'
environment_meta_rliabincome_all_environment$stat <- 'SES'
environment_meta_rliabaqi_all_environment$stat <- 'AQI'
environment_meta_rliabtemp_all_environment$stat <- 'temperature'
a <- environment_meta_h2_liab_all_environment %>% select(functional_domain, statistic = h2,se=h2_SE, stat)
b <- environment_meta_c2_liab_all_environment %>% select(functional_domain, statistic = c2, se=c2_SE, stat)
#c <- environment_meta_e2_liab_all_environment %>% select(functional_domain, statistic = e2, stat)
d <- environment_meta_rliabincome_all_environment %>% select(functional_domain, statistic = var_income,se=var_income_SE, stat)
e <- environment_meta_rliabaqi_all_environment %>% select(functional_domain, statistic = var_aqi,se=var_aqi_SE, stat)
f <- environment_meta_rliabtemp_all_environment %>% select(functional_domain, statistic = var_temp,se=var_temp_SE, stat)
z_meta_all <- a %>% bind_rows(.,b,d,e,f)
rm(a,b,d,e,f)
z_meta_all$stat <- factor(z_meta_all$stat, levels=c('h2','c2','SES','AQI','temperature'))
z_meta_all$functional_domain <- factor(z_meta_all$functional_domain,
levels = c('All Traits',
'Aging',
'Cardiovascular',
'Cognitive',
'Connective tissue',
'Dermatological',
'Developmental',
'Ear,Nose,Throat',
'Endocrine',
'Environment',
'Gastrointestinal',
'Hematological',
'Immunological',
'Infection',
'Metabolic',
'Neoplasms',
'Neurological',
'Ophthalmological',
'Psychiatric',
'Reproduction',
'Respiratory',
'Skeletal',
'Quantitative',
'Avg. Monthly Cost',
'PheWAS Comorbids'))
size_text <- 4
barchart_fn_domain_environment <- ggplot(z_meta_all, aes(functional_domain, statistic, fill=stat)) +
geom_bar(stat="identity", position="dodge", width = 0.75, colour="black", size=.2) +
theme(legend.position=c(.25,.75),legend.direction = "horizontal") +
theme(axis.text=element_text(size=size_text)) +
theme(axis.text.x = element_text(angle = 30, size=size_text, hjust = .75),
axis.text.y = element_text(size=size_text),
axis.title.x = element_text(size=size_text,vjust=4),
axis.title.y = element_text(size=size_text,vjust=-1)) +
theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text)) +
theme(legend.key.size = unit(.2, "cm")) +
geom_errorbar(aes(ymin=statistic-1.96*se,
ymax=statistic+1.96*se),
width=.5,size=.2,
position=position_dodge(.9))
barchart_fn_domain_environment
#ggsave(barchart_fn_domain_environment, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/barchart_fn_domain_environment.png')
braden_df_meta_analytic_estimates_environment <- z_meta_all
ggsave(barchart_fn_domain_environment, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/supp_figure_5.jpg',dpi='print',
device = 'jpeg', width = 3,height= 2)
environment_meta_rliabincome_all_environment$stat <- 'SES'
environment_meta_rliabaqi_all_environment$stat <- 'AQI'
environment_meta_rliabtemp_all_environment$stat <- 'temperature'
d <- environment_meta_rliabincome_all_environment %>% select(functional_domain, statistic = var_income, se=var_income_SE, stat)
e <- environment_meta_rliabaqi_all_environment %>% select(functional_domain, statistic = var_aqi, se=var_aqi_SE, stat)
f <- environment_meta_rliabtemp_all_environment %>% select(functional_domain, statistic = var_temp,se=var_temp_SE, stat)
z_meta_all <- d %>% bind_rows(.,e,f)
rm(d,e,f)
z_meta_all$stat <- factor(z_meta_all$stat, levels=c('SES','AQI','temperature'))
z_meta_all$functional_domain <- factor(z_meta_all$functional_domain,
levels = c('All Traits',
'Aging',
'Cardiovascular',
'Cognitive',
'Connective tissue',
'Dermatological',
'Developmental',
'Ear,Nose,Throat',
'Endocrine',
'Environment',
'Gastrointestinal',
'Hematological',
'Immunological',
'Infection',
'Metabolic',
'Neoplasms',
'Neurological',
'Ophthalmological',
'Psychiatric',
'Reproduction',
'Respiratory',
'Skeletal',
'Quantitative',
'Avg. Monthly Cost',
'PheWAS Comorbids'))
#barchart_fn_domain_just_environment <- ggplot(z_meta_all, aes(functional_domain, statistic, fill=stat)) +
# geom_bar(stat="identity", position="dodge", width = 0.75, colour="black") +
# theme(axis.text.x=element_text(angle=45, size = 8),
# axis.title.x = element_text(vjust=10)) +
#ylab('Statistic Value') + xlab('Functional Domain') + labs(fill='Statistic') +
# theme(legend.position="bottom")
#barchart_fn_domain_just_environment
#ggsave(barchart_fn_domain_just_environment, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/barchart_fn_domain_just_environment.png')
size_text <- 4
barchart_fn_domain_just_environment <- ggplot(z_meta_all, aes(functional_domain, statistic, fill=stat)) +
geom_bar(stat="identity", position="dodge", width = 0.75, colour="black", size=.2) +
theme(legend.position=c(.3,.75),legend.direction = "vertical") +
theme(axis.text=element_text(size=size_text)) +
theme(axis.text.x = element_text(angle = 30, size=size_text, hjust = .75),
axis.text.y = element_text(size=size_text),
axis.title.x = element_text(size=size_text,vjust=4),
axis.title.y = element_text(size=size_text,vjust=-1)) +
theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text)) +
theme(legend.key.size = unit(.2, "cm")) +
geom_errorbar(aes(ymin=statistic-1.96*se,
ymax=statistic+1.96*se),
width=.5,size=.2,
position=position_dodge(.9))
barchart_fn_domain_just_environment
ggsave(barchart_fn_domain_just_environment, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/supp_figure_11.jpg',dpi='print',
device = 'jpeg', width = 3,height= 2)
h2_meta <- metaAll_justMatch_h2 %>% filter(category == 'Insurance Claims') %>%
mutate(h2_low = h2 - 1.96*h2_SE, h2_high= h2+ 1.96*h2_SE) %>% ungroup()
h2_fdr <- allphewas_both_functional_domain %>% group_by(functional_domain) %>% summarise(count_n=n(), fdr_threshold=sum(pvalue_h2_FDR <= 0.05)) %>% mutate(pct_FDR = (fdr_threshold/count_n)*100.0) %>% ungroup()
h2_meta_fdr <- h2_meta %>% inner_join(.,h2_fdr, by='functional_domain')
## Warning: Column `functional_domain` joining factor and character vector,
## coercing into character vector
h2_meta_fdr <- round_df(h2_meta_fdr, digits=3)
DT::datatable(h2_meta_fdr)
c2_meta <- metaAll_justMatch_c2 %>% filter(category == 'Insurance Claims') %>%
mutate(c2_low = c2 - 1.96*c2_SE, c2_high= c2+ 1.96*c2_SE) %>% ungroup()
c2_fdr <- allphewas_both_functional_domain %>% group_by(functional_domain) %>% summarise(count_n=n(), fdr_threshold=sum(pvalue_c2_FDR <= 0.05)) %>% mutate(pct_FDR = (fdr_threshold/count_n)*100.0) %>% ungroup()
c2_meta_fdr <- c2_meta %>% inner_join(.,c2_fdr, by='functional_domain')
## Warning: Column `functional_domain` joining factor and character vector,
## coercing into character vector
c2_meta_fdr <- round_df(c2_meta_fdr, digits=3)
DT::datatable(c2_meta_fdr)
e2_meta <- phewas_meta_e2_liab_all %>% filter(category == 'Insurance Claims') %>%
mutate(e2_low = e2 - 1.96*e2_SE, e2_high= e2+ 1.96*e2_SE) %>% ungroup()
e2_fdr <- allphewas_both_functional_domain %>% group_by(functional_domain) %>% summarise(count_n=n(), fdr_threshold=sum(e2.pvalue_FDR <= 0.05)) %>% mutate(pct_FDR = (fdr_threshold/count_n)*100.0) %>% ungroup()
e2_meta_fdr <- e2_meta %>% inner_join(.,e2_fdr, by='functional_domain')
e2_meta_fdr <- round_df(e2_meta_fdr, digits=3)
DT::datatable(e2_meta_fdr)
#environment_meta_rliabincome_all_environment
#e2_meta <- environment_meta_rliabincome_all_environment %>% filter(category == 'Insurance Claims') %>%
# mutate(var_income_low = var_income - 1.96*e2_SE, e2_high= e2+ 1.96*e2_SE) %>% ungroup()
SES_fdr <- allphewas_both_zipcode_functional_domain %>%
filter(!is.na(rliabincome.pvalue_FDR)) %>%
group_by(functional_domain) %>%
summarise(count_n=n(),
fdr_threshold=sum(rliabincome.pvalue_FDR <= 0.05)) %>% ungroup() %>%
mutate(pctThreshold=(fdr_threshold/561)*100)
SES_fdr <- environment_meta_rliabincome_all_environment %>% full_join(.,SES_fdr, by='functional_domain')
SES_fdr <- round_df(SES_fdr, digits=3)
DT::datatable(SES_fdr)
#environment_meta_rliabincome_all_environment
#e2_meta <- environment_meta_rliabincome_all_environment %>% filter(category == 'Insurance Claims') %>%
# mutate(var_income_low = var_income - 1.96*e2_SE, e2_high= e2+ 1.96*e2_SE) %>% ungroup()
AQI_fdr <- allphewas_both_zipcode_functional_domain %>%
filter(!is.na(rliabaqi.pvalue_FDR)) %>%
group_by(functional_domain) %>%
summarise(count_n=n(),
fdr_threshold=sum(rliabaqi.pvalue_FDR <= 0.05)) %>% ungroup() %>%
mutate(pctThreshold=(fdr_threshold/561)*100)
AQI_fdr <- environment_meta_rliabaqi_all_environment %>% full_join(.,AQI_fdr, by='functional_domain')
AQI_fdr <- round_df(AQI_fdr, digits=3)
DT::datatable(AQI_fdr)
#environment_meta_rliabincome_all_environment
#e2_meta <- environment_meta_rliabincome_all_environment %>% filter(category == 'Insurance Claims') %>%
# mutate(var_income_low = var_income - 1.96*e2_SE, e2_high= e2+ 1.96*e2_SE) %>% ungroup()
temp_fdr <- allphewas_both_zipcode_functional_domain %>%
filter(!is.na(rliabtemp.pvalue_FDR)) %>%
group_by(functional_domain) %>%
summarise(count_n=n(),
fdr_threshold=sum(rliabtemp.pvalue_FDR <= 0.05)) %>% ungroup() %>%
mutate(pctThreshold=(fdr_threshold/561)*100)
temp_fdr <- environment_meta_rliabtemp_all_environment %>% full_join(.,temp_fdr, by='functional_domain')
temp_fdr <- round_df(temp_fdr, digits=3)
DT::datatable(temp_fdr)
phewas <- read.table("~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/analysis/phewasAllDescription.csv", header=TRUE)
allphewas_binary_zipcode_old <- read_delim("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/twin_binary_zipcode_old.csv",
" ", escape_double = FALSE, col_names = FALSE,
trim_ws = TRUE)
## Parsed with column specification:
## cols(
## .default = col_double(),
## X1 = col_character(),
## X2 = col_integer(),
## X3 = col_integer(),
## X4 = col_integer(),
## X5 = col_integer(),
## X6 = col_integer(),
## X7 = col_integer(),
## X8 = col_integer(),
## X9 = col_integer(),
## X75 = col_integer(),
## X76 = col_integer(),
## X77 = col_integer(),
## X78 = col_integer(),
## X79 = col_integer()
## )
## See spec(...) for full column specifications.
names(allphewas_binary_zipcode_old) <- readRDS('~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/names_binary_zipcode_old.rds')
allphewas_binary_zipcode_old <- allphewas_binary_zipcode_old %>%
mutate(h2_liab_SE_se = ifelse(phewas_code == '984',h2_SE,h2_liab_SE_se)) %>%
mutate(rliabss_SE_se = ifelse(phewas_code == '984',r_SS_SE,rliabss_SE_se)) %>%
mutate(c2_liab_SE_se = ifelse(phewas_code == '984',c2_SE,c2_liab_SE_se)) %>%
mutate(rliabos_SE_se = ifelse(phewas_code == '984',r_OS_SE,rliabos_SE_se))
names(allphewas_binary_zipcode_old) <- sub("_se", "", names(allphewas_binary_zipcode_old))
allphewas_binary_zipcode_old <- allphewas_binary_zipcode_old %>% inner_join(phewas, ., by='phewas_code')
## Warning: Column `phewas_code` joining factor and character vector, coercing
## into character vector
comparison_var_comp_old <- allphewas_binary_zipcode_old %>%
select(phewas_code, phewas_description,
rliabincome_old =rliabincome, rliabincome_SE_old=rliabincome_SE,
rliabaqi_old=rliabaqi, rliabaqi_SE_old=rliabaqi_SE,
rliabtemp_old=rliabtemp, rliabtemp_SE_old=rliabtemp_SE)
comparison_var_comp_new <- allphewas_both_zipcode %>%
select(phewas_code,
rliabincome, rliabincome_SE,
rliabaqi, rliabaqi_SE,
rliabtemp, rliabtemp_SE)
comparison_var_comp_both <- comparison_var_comp_old %>%
inner_join(.,comparison_var_comp_new, by='phewas_code') %>%
mutate(diff_income = rliabincome - rliabincome_old, diff_income_SE = sqrt(rliabincome_SE^2+rliabincome_SE_old^2)) %>%
mutate(diff_aqi = rliabaqi - rliabaqi_old, diff_aqi_SE = sqrt(rliabaqi_SE^2+rliabaqi_SE_old^2)) %>%
mutate(diff_temp = rliabtemp - rliabtemp_old, diff_temp_SE = sqrt(rliabtemp_SE^2+rliabtemp_SE_old^2)) %>%
mutate(diff_income.zscore=diff_income/diff_income_SE) %>%
mutate(diff_income.pvalue=2*pnorm(-abs(diff_income.zscore))) %>%
mutate(diff_income.pvalue_FDR = p.adjust(diff_income.pvalue, method='BY')) %>%
mutate(diff_aqi.zscore=diff_aqi/diff_aqi_SE) %>%
mutate(diff_aqi.pvalue=2*pnorm(-abs(diff_aqi.zscore))) %>%
mutate(diff_aqi.pvalue_FDR = p.adjust(diff_aqi.pvalue, method='BY')) %>%
mutate(diff_temp.zscore=diff_temp/diff_temp_SE) %>%
mutate(diff_temp.pvalue=2*pnorm(-abs(diff_temp.zscore))) %>%
mutate(diff_temp.pvalue_FDR = p.adjust(diff_temp.pvalue, method='BY'))
ggplot(comparison_var_comp_both , aes(rliabincome_old,rliabincome)) + geom_point() +
geom_abline(slope = 1, intercept = 0)
ggplot(comparison_var_comp_both , aes(rliabaqi_old,rliabaqi)) + geom_point() +
geom_abline(slope = 1, intercept = 0)
ggplot(comparison_var_comp_both , aes(rliabtemp_old,rliabtemp)) + geom_point() +
geom_abline(slope = 1, intercept = 0)
## Income FDR and pi_0
qval_income <- qvalue(comparison_var_comp_both$diff_income.pvalue)
summary(qval_income)
##
## Call:
## qvalue(p = comparison_var_comp_both$diff_income.pvalue)
##
## pi0: 1
##
## Cumulative number of significant calls:
##
## <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1 <1
## p-value 4 12 23 31 40 59 500
## q-value 1 1 4 8 13 18 89
## local FDR 1 1 2 4 5 9 33
comparison_var_comp_both %>% filter(diff_income.pvalue_FDR <= 0.05) %>% tally()
## n
## 1 3
comparison_var_comp_both %>% filter(diff_income.pvalue_FDR <= 0.05) %>% select(phewas_description)
## phewas_description
## 1 Otitis media and Eustachian tube disorders
## 2 Otitis media
## 3 Acute sinusitis
comparison_var_comp_both %>% filter(diff_income < 0) %>% tally()
## n
## 1 260
comparison_var_comp_both %>% filter(diff_income > 0) %>% tally()
## n
## 1 243
comparison_var_comp_both %>% filter(diff_income == 0) %>% tally()
## n
## 1 49
income_rank_var <- comparison_var_comp_both %>%
select(phewas_code, phewas_description, rliabincome_old, rliabincome, rliabincome_SE, rliabincome_SE_old) %>%
mutate(oldRank=rank(-rliabincome_old), newRank=rank(-rliabincome)) %>%
mutate(rliabincome_old_low=rliabincome_old-1.96*rliabincome_SE_old,
rliabincome_old_high=rliabincome_old+1.96*rliabincome_SE_old,
rliabincome_low=rliabincome-1.96*rliabincome_SE,
rliabincome_high=rliabincome+1.96*rliabincome_SE)
DT::datatable(round_df(income_rank_var, digits = 3))
qval_AQI <- qvalue(comparison_var_comp_both$diff_aqi.pvalue)
summary(qval_AQI)
##
## Call:
## qvalue(p = comparison_var_comp_both$diff_aqi.pvalue)
##
## pi0: 1
##
## Cumulative number of significant calls:
##
## <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1 <1
## p-value 5 9 15 23 25 44 479
## q-value 1 2 6 9 9 14 25
## local FDR 0 1 2 3 8 8 15
comparison_var_comp_both %>% filter(diff_aqi.pvalue_FDR <= 0.05) %>% tally()
## n
## 1 2
comparison_var_comp_both %>% filter(diff_aqi.pvalue_FDR <= 0.05) %>% select(phewas_description)
## phewas_description
## 1 Acute sinusitis
## 2 Allergic rhinitis
comparison_var_comp_both %>% filter(diff_aqi < 0) %>% tally()
## n
## 1 205
comparison_var_comp_both %>% filter(diff_aqi > 0) %>% tally()
## n
## 1 274
comparison_var_comp_both %>% filter(diff_aqi == 0) %>% tally()
## n
## 1 73
aqi_rank_var <- comparison_var_comp_both %>%
select(phewas_code, phewas_description, rliabaqi_old, rliabaqi, rliabaqi_SE, rliabaqi_SE_old) %>%
mutate(oldRank=rank(-rliabaqi_old), newRank=rank(-rliabaqi)) %>%
mutate(rliabaqi_old_low=rliabaqi_old-1.96*rliabaqi_SE_old,
rliabaqi_old_high=rliabaqi_old+1.96*rliabaqi_SE_old,
rliabaqi_low=rliabaqi-1.96*rliabaqi_SE,
rliabaqi_high=rliabaqi+1.96*rliabaqi_SE)
DT::datatable(round_df(aqi_rank_var, digits = 3))
qval_temp <- qvalue(comparison_var_comp_both$diff_temp.pvalue)
summary(qval_temp)
##
## Call:
## qvalue(p = comparison_var_comp_both$diff_temp.pvalue)
##
## pi0: 1
##
## Cumulative number of significant calls:
##
## <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1 <1
## p-value 0 0 2 6 7 14 496
## q-value 0 0 0 0 0 0 0
## local FDR 0 0 0 0 0 0 0
comparison_var_comp_both %>% filter(diff_temp.pvalue_FDR <= 0.05) %>% tally()
## n
## 1 0
comparison_var_comp_both %>% filter(diff_temp < 0) %>% tally()
## n
## 1 257
comparison_var_comp_both %>% filter(diff_temp > 0) %>% tally()
## n
## 1 240
comparison_var_comp_both %>% filter(diff_temp == 0) %>% tally()
## n
## 1 55
temp_rank_var <- comparison_var_comp_both %>%
select(phewas_code, phewas_description, rliabtemp_old, rliabtemp, rliabtemp_SE, rliabtemp_SE_old) %>%
mutate(oldRank=rank(-rliabtemp_old), newRank=rank(-rliabtemp)) %>%
mutate(rliabtemp_old_low=rliabtemp_old-1.96*rliabtemp_SE_old,
rliabtemp_old_high=rliabtemp_old+1.96*rliabtemp_SE_old,
rliabtemp_low=rliabtemp-1.96*rliabtemp_SE,
rliabtemp_high=rliabtemp+1.96*rliabtemp_SE)
DT::datatable(round_df(temp_rank_var, digits = 3))
############### Subfunction ############################
## Compute the estimate of pi0 for a fixed value of B ##
########################################################
FixedB <- function(p, B)
{
## Input:
#p: a vector of p-values
#B: an integer, the interval [0,1] is divided into B equal-length intervals
## Output:
#pi0: an estimate of the proportion of true null hypotheses
m <- length(p)
t <- seq(0,1,length=B+1) # equally spaced points in the interval [0,1]
NB <- rep(0,B) # number of p-values greater than t_i
NBaverage <- rep(0,B) # average number of p-values in each of the (B-i+1) small intervals on [t_i,1]
NS <- rep(0,B) # number of p-values in the interval [t_i, t_(i+1)]
pi <- rep(0,B) # estimates of pi0
for(i in 1:B)
{
NB[i] <- length(p[p>=t[i]])
NBaverage[i] <- NB[i]/(B-(i-1))
NS[i] <- length(p[p>=t[i]]) - length(p[p>=t[i+1]])
pi[i] <- NB[i]/(1-t[i])/m
}
i <- min(which(NS <= NBaverage)) # Find change point i
pi0 <- min(1, mean(pi[(i-1):B])) # average estiamte of pi0
return(pi0)
}
############## main function ###########################
## (1) Compute the estimate of pi0 ##
## (2) Apply the adaptive FDR controlling procedure ##
########################################################
AverageEstimate <- function(p=NULL, Bvector=c(5, 10, 20, 50, 100), alpha=0.05)
{
## Input:
#p: a vector of p-values
#Bvector: a vector of integer values where the interval [0,1] is divided into B equal-length intervals
# When Bvector is an integer, number of intervals is consider as fixed. For example Bvector = 10;
# When Bvector is a vector, bootstrap method is used to choose an optimal value of B
#alpha: FDR signficance level so that the FDR is controlled below alpha
#Numboot: number of bootstrap samples
## Output:
#pi0: an estimate of the proportion of true null hypotheses
#significant: a vector of indicator variables;
# it is 1 if the corresponding p-value is significant
# it is 0 if the corresponding p-value is not significant
# check if the p-values are valid
if (min(p)<0 || max(p)>1) print("Error: p-values are not in the interval of [0,1]")
m <- length(p) # Total number p-values
Bvector <- as.integer(Bvector) # Make sure Bvector is a vector of integers
#Bvector has to be bigger than 1
if(min(Bvector) <=1) print ("Error: B has to be bigger than 1")
######## Estimate pi0 ########
if(length(Bvector) == 1) # fixed number of numbers, i.e., B is fixed
{
pi0 <- AverageEstimateFixedB(p, Bvector)
}
else
{
Numboot <- 100
OrigPi0Est <- rep(0, length(Bvector))
for (Bloop in 1:length(Bvector))
{
OrigPi0Est[Bloop] <- FixedB(p, Bvector[Bloop])
}
BootResult <- matrix(0, nrow=Numboot, ncol=length(Bvector)) # Contains the bootstrap results
for(k in 1:Numboot)
{
p.boot <- sample(p, m, replace=TRUE) # bootstrap sample
for (Bloop in 1:length(Bvector))
{
BootResult[k,Bloop] <- FixedB(p.boot, Bvector[Bloop])
}
}
MeanPi0Est <- mean(OrigPi0Est) # avearge of pi0 estimates over the range of Bvector
MSEestimate <- rep(0, length(Bvector)) # compute mean-squared error
for (i in 1:length(Bvector))
{
MSEestimate[i] <- (OrigPi0Est[i]- MeanPi0Est)^2
for (k in 1:Numboot)
{
MSEestimate[i] <- MSEestimate[i]+1/Numboot*(BootResult[k,i] - OrigPi0Est[i])^2
}
}
pi0 <- OrigPi0Est[MSEestimate==min(MSEestimate)]
} # end of else
######## Apply the adaptive FDR controlling procedure ########
sorted.p <- sort(p) # sorted p-values
order.p <- order(p) # order of the p-values
m0 <- pi0*m # estimate of the number of true null
i <- m
crit <- i/m0*alpha
while (sorted.p[i] > crit )
{
i <- i-1
crit <- i/m0*alpha
if (i==1) break
}
K <- i
if (K==1 & sorted.p[K] <= 1/m0*alpha) K <- 1
if (K==1 & sorted.p[K] > 1/m0*alpha) K <- 0
significant <- rep(0,m) # indicator of significance of the p-values
if (K > 0) significant[order.p[1:K]] <- 1
result <- list(pi0=pi0, significant=significant)
result
}
DT::datatable(allphewas_both %>% filter(c2_liab.pvalue <= 0.05) %>% tally())
DT::datatable(allphewas_both %>% filter(c2_liab.pvalue > 0.05) %>% tally())
m_sig <- allphewas_both_functional_domain %>% filter(c2_liab.pvalue <= 0.05) %>% group_by(functional_domain) %>% tally() %>% mutate(sig_num=n) %>% select(-n)
m_non_sig <- allphewas_both_functional_domain %>% filter(c2_liab.pvalue > 0.05) %>% group_by(functional_domain) %>% tally() %>% mutate(insig_num=n) %>% select(-n)
m_both <- m_non_sig %>% left_join(., m_sig, by='functional_domain')
m_both[is.na(m_both)] <- 0
m_both$pct_additive <-( m_both$insig_num / (m_both$sig_num + m_both$insig_num) ) * 100
m_both$total <-( (m_both$sig_num + m_both$insig_num) )
DT::datatable(round_df(m_both, digits=3))
dataPvalue <- allphewas_both %>% select(c2_liab.pvalue) %>% filter(complete.cases(.))
library(qvalue)
qobj <- qvalue(dataPvalue$c2_liab.pvalue)
summary(qobj)
##
## Call:
## qvalue(p = dataPvalue$c2_liab.pvalue)
##
## pi0: 0.5260485
##
## Cumulative number of significant calls:
##
## <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1 <1
## p-value 140 163 207 226 258 285 559
## q-value 133 154 194 220 253 285 559
## local FDR 116 131 150 170 183 200 366
AverageEstimate(dataPvalue$c2_liab.pvalue )
## $pi0
## [1] 0.4822072
##
## $significant
## [1] 1 1 1 1 1 0 1 0 0 1 1 1 1 1 0 0 0 1 0 0 1 1 1 1 1 1 1 0 1 1 1 0 1 1 0
## [36] 0 0 0 0 0 0 0 1 0 1 1 0 0 1 1 0 1 1 1 1 1 0 1 1 1 0 0 0 1 1 1 1 0 0 0
## [71] 0 0 0 1 0 1 1 0 1 1 1 1 0 1 1 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0
## [106] 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 1 1 0 1 0 1 1 0 1 1 0 0 0 0 0 0 0 0 0 0
## [141] 0 0 1 0 0 0 1 1 1 0 0 1 1 0 0 0 0 0 1 0 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1
## [176] 1 0 1 1 1 0 1 1 0 1 1 1 1 0 0 1 0 1 0 1 1 1 0 1 1 1 0 0 0 1 1 1 1 1 1
## [211] 0 0 1 0 0 0 1 1 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 0 0
## [246] 1 1 1 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 0 0 0 0
## [281] 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0
## [316] 0 0 0 0 1 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1
## [351] 1 1 1 0 1 1 1 1 0 0 0 0 0 0 1 0 1 0 1 1 1 1 1 0 0 0 0 0 0 0 1 1 0 0 0
## [386] 0 1 1 1 0 0 1 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 1 1 0 1 0 1 1 1 0 1 1 1
## [421] 1 1 1 1 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0 0 0
## [456] 0 0 0 1 1 1 0 1 1 1 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 1 1 0 0 1 1 1 1
## [491] 0 0 0 1 1 0 0 0 1 1 0 1 0 1 0 0 0 1 0 0 0 0 0 1 1 1 0 1 0 1 0 0 0 0 1
## [526] 1 1 0 0 0 0 1 0 1 0 0 1 1 0 1 1 0 0 0 0 1 1 1 0 0 0 0 0 1 1 1 1 1 1
DT::datatable(allphewas_both %>% filter( pvalue_h2_FDR <= 0.05) %>% tally() / 561)
DT::datatable(allphewas_both %>% filter(pvalue_h2_FDR > 0.05) %>% tally() / 561)
DT::datatable(allphewas_both %>% filter( pvalue_h2_FDR <= 0.05) %>% tally())
DT::datatable(allphewas_both %>% filter(pvalue_h2_FDR > 0.05) %>% tally())
DT::datatable(allphewas_both %>% filter( pvalue_c2_FDR <= 0.05) %>% tally())
DT::datatable(allphewas_both %>% filter(pvalue_c2_FDR > 0.05) %>% tally())
DT::datatable(round_df(allphewas_both %>% filter( pvalue_h2_FDR <= 0.05) %>% tally() / 561, digits=3))
DT::datatable(round_df(allphewas_both %>% filter( pvalue_c2_FDR <= 0.05) %>% tally() / 561, digits=3))
DT::datatable(allphewas_both %>% filter( pvalue_h2_bonferroni <= 0.05) %>% tally() / 561)
DT::datatable(allphewas_both %>% filter(pvalue_h2_bonferroni > 0.05) %>% tally() / 561)
DT::datatable(allphewas_both %>% filter( pvalue_h2_bonferroni <= 0.05) %>% tally())
DT::datatable(allphewas_both %>% filter(pvalue_h2_bonferroni > 0.05) %>% tally())
DT::datatable(allphewas_both %>% filter( pvalue_c2_bonferroni <= 0.05) %>% tally())
DT::datatable(allphewas_both %>% filter(pvalue_c2_bonferroni > 0.05) %>% tally())
DT::datatable(round_df(allphewas_both %>% filter( pvalue_h2_bonferroni <= 0.05) %>% tally() / 561, digits=3))
DT::datatable(round_df(allphewas_both %>% filter( pvalue_c2_bonferroni <= 0.05) %>% tally() / 561, digits=3))
DT::datatable(allphewas_both %>% filter( pvalue_beta_age_FDR <= 0.05) %>% tally())
DT::datatable(round_df(allphewas_both %>% filter( pvalue_beta_age_FDR <= 0.05) %>% tally() / 561, digits=3))
DT::datatable(allphewas_both %>% filter( pvalue_beta_gender_FDR <= 0.05) %>% tally())
DT::datatable(round_df(allphewas_both %>% filter( pvalue_beta_gender_FDR <= 0.05) %>% tally() / 561, digits=3))
h2/c2/e2 for all phenotypes
phenotypes_h2_c2_e2 <- allphewas_both %>% select(phewas_code, phewas_description, h2_liab, h2_liab_SE, c2_liab, c2_liab_SE, e2, e2_SE) %>%
mutate(h2_low=h2_liab-1.96*h2_liab_SE,h2_high=h2_liab+1.96*h2_liab_SE ) %>%
mutate(c2_low=c2_liab-1.96*c2_liab_SE,c2_high=c2_liab+1.96*c2_liab_SE ) %>%
mutate(e2_low=e2-1.96*e2_SE,e2_high=e2+1.96*e2_SE ) %>%
select(phewas_code, phewas_description, h2_liab,h2_low,h2_high, c2_liab, c2_low,c2_high, e2, e2_low,e2_high)
DT::datatable(round_df(phenotypes_h2_c2_e2, digits=3))
h2/c2/e2 for all phenotypes with p-value
phenotypes_h2_c2_e2_pvalue <- allphewas_both %>% select(phewas_code, phewas_description, h2_liab, h2_liab_SE,
c2_liab, c2_liab_SE, e2, e2_SE,
pvalue_h2_FDR, h2_liab.zscore, pvalue_c2_FDR, c2_liab.zscore, e2.pvalue_FDR, e2.zscore, rliabos, rliabos_SE, rliabss, rliabss_SE) %>%
mutate(h2_low=h2_liab-1.96*h2_liab_SE,h2_high=h2_liab+1.96*h2_liab_SE ) %>%
mutate(c2_low=c2_liab-1.96*c2_liab_SE,c2_high=c2_liab+1.96*c2_liab_SE ) %>%
mutate(e2_low=e2-1.96*e2_SE,e2_high=e2+1.96*e2_SE ) %>%
mutate(rliabos_low = rliabos - 1.96* rliabos_SE, rliabos_high = rliabos + 1.96* rliabos_SE ) %>%
mutate(rliabss_low = rliabss - 1.96* rliabss_SE, rliabss_high = rliabss + 1.96* rliabss_SE) %>%
select(phewas_code, phewas_description, h2_liab,h2_low,h2_high, pvalue_h2_FDR, h2_liab.zscore,
c2_liab, c2_low,c2_high,pvalue_c2_FDR, c2_liab.zscore,
e2, e2_low,e2_high, e2.pvalue_FDR, e2.zscore,
rliabos, rliabos_low, rliabos_high, rliabss, rliabss_low, rliabss_high)
DT::datatable(round_df(phenotypes_h2_c2_e2_pvalue, digits = 3))
twin_h2_c2_comparison_diff_twin_match <- readRDS(file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/MaTCHComparisonAnalysis/twin_h2_c2_comparison_diff_twin_match.rds')
cor.test(twin_h2_c2_comparison_diff_twin_match$h2_liab, twin_h2_c2_comparison_diff_twin_match$h2_match,method = 'pearson')
##
## Pearson's product-moment correlation
##
## data: twin_h2_c2_comparison_diff_twin_match$h2_liab and twin_h2_c2_comparison_diff_twin_match$h2_match
## t = 4.633, df = 79, p-value = 1.399e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2712569 0.6181865
## sample estimates:
## cor
## 0.4622291
m <- as.matrix(twin_h2_c2_comparison_diff_twin_match %>% select(h2_liab,h2_liab_SE, h2_match, h2_match_SE))
estimateCorrelation_matrix(1:nrow(m),m)
## [1] 0.8168958
results <- jackknife(1:nrow(m),estimateCorrelation_matrix,m)
results$jack.se
## [1] 0.1652211
results$jack.bias
## [1] 0.06142777
#ggplot(twin_h2_c2_comparison_diff_twin_match , aes(h2_match,h2_liab)) + geom_point() +
# geom_errorbarh(aes(xmin=h2_match-1.96*h2_match_SE, xmax=h2_match+1.96*h2_match_SE),
# position = position_dodge(width = 0.01), alpha=.2) +
# geom_errorbar(aes(ymin=h2_liab-1.96*h2_liab_SE, ymax=h2_liab+1.96*h2_liab_SE),
# position = position_dodge(width = 0.01), alpha=.2) + geom_abline(slope = 1, intercept = 0) +
# geom_smooth(method = "lm", se = TRUE) + xlab('Heritability - Published Studies') + ylab('Heritability - Insurance #Study')
twin_h2_c2_comparison_diff_twin_match %>%
mutate(diff=h2_match-h2_liab) %>%
mutate(higherEstimate=ifelse(diff >=0, 'Published Is Higher', 'Claims is Higher')) %>%
group_by(higherEstimate) %>%
tally()
## # A tibble: 2 x 2
## higherEstimate n
## <chr> <int>
## 1 Claims is Higher 32
## 2 Published Is Higher 49
twinComparison_insurance_published <- twin_h2_c2_comparison_diff_twin_match %>%
select(-subchapter, -diff) %>%
select(subchapterName,h2_insurance=h2_liab,
h2_insurance_SE=h2_liab_SE, h2_published=h2_match,
h2_published_SE=h2_match_SE,source, method)
DT::datatable(twinComparison_insurance_published)
#library(googlesheets)
#boring_ss <- gs_new("published_estimate_comparison",
# ws_title = "published_estimate_comparison",
# input = round_df(twinComparison_insurance_published,digits = 3),
# trim = TRUE, verbose = FALSE)
library(readr)
library(data.table)
##
## Attaching package: 'data.table'
## The following object is masked from 'package:DescTools':
##
## %like%
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
allphewas_binary <- read_delim("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/twin_binary.csv",
" ", escape_double = FALSE, col_names = FALSE,
trim_ws = TRUE)
## Parsed with column specification:
## cols(
## .default = col_double(),
## X1 = col_character(),
## X2 = col_integer(),
## X3 = col_integer(),
## X4 = col_integer(),
## X5 = col_integer(),
## X6 = col_integer(),
## X7 = col_integer(),
## X8 = col_integer(),
## X9 = col_integer(),
## X57 = col_integer(),
## X58 = col_integer(),
## X59 = col_integer(),
## X60 = col_integer(),
## X61 = col_integer()
## )
## See spec(...) for full column specifications.
names(allphewas_binary) <- readRDS('~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/names_binary.rds')
phewas <- read.table("~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/analysis/phewasAllDescription.csv", header=TRUE)
allphewas_binary <- allphewas_binary %>%
mutate(h2_liab_SE_se = ifelse(phewas_code == '984',h2_SE,h2_liab_SE_se)) %>%
mutate(rliabss_SE_se = ifelse(phewas_code == '984',r_SS_SE,rliabss_SE_se)) %>%
mutate(c2_liab_SE_se = ifelse(phewas_code == '984',c2_SE,c2_liab_SE_se)) %>%
mutate(rliabos_SE_se = ifelse(phewas_code == '984',r_OS_SE,rliabos_SE_se))
names(allphewas_binary) <- sub("_se", "", names(allphewas_binary))
allphewas_binary <- allphewas_binary %>% inner_join(phewas, ., by='phewas_code')
## Warning: Column `phewas_code` joining factor and character vector, coercing
## into character vector
allphewas_sibling <- read_delim("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/data/allphewas_sibling.txt",
" ", escape_double = FALSE, col_names = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
## .default = col_double(),
## X1 = col_character(),
## X2 = col_integer(),
## X3 = col_integer(),
## X4 = col_integer(),
## X5 = col_integer(),
## X6 = col_integer(),
## X31 = col_character()
## )
## See spec(...) for full column specifications.
names(allphewas_sibling) <- readRDS('~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/data/names_binary_sibling.rds')
names(allphewas_sibling) <- sub("_se", "", names(allphewas_sibling))
df1 <- allphewas_sibling %>% select(phewas_code, genderPairType,rliabsibling ) %>% spread(genderPairType,rliabsibling) %>%
select(phewas_code, rliabsibling_SS=SS,rliabsibling_OS=OS)
df2 <- allphewas_sibling %>% select(phewas_code, genderPairType,rliabsibling_SE) %>% spread(genderPairType,rliabsibling_SE) %>%
select(phewas_code, rliabsibling_SE_SS=SS,rliabsibling_SE_OS=OS)
allphewas_sibling_correlation <- df1 %>% inner_join(.,df2, by='phewas_code')
allphewas_binary_correlation <- allphewas_binary %>% select(phewas_code, phewas_description, rliabss, rliabss_SE,rliabos, rliabos_SE, h2_liab, h2_liab_SE, c2_liab, c2_liab_SE)
allphewas_all_correlation_sibling_twin <- allphewas_binary_correlation %>% left_join(.,allphewas_sibling_correlation, by='phewas_code')
rm(df1,df2)
correlation_twinOS_sibOS <- ggplot(allphewas_all_correlation_sibling_twin , aes(rliabos,rliabsibling_OS)) + geom_point() +
geom_errorbarh(aes(xmin=rliabos-1.96*rliabos_SE, xmax=rliabos+1.96*rliabos_SE),
position = position_dodge(width = 0.01), alpha=.2) +
geom_errorbar(aes(ymin=rliabsibling_OS-1.96*rliabsibling_SE_OS, ymax=rliabsibling_OS+1.96*rliabsibling_SE_OS),
position = position_dodge(width = 0.01), alpha=.2) + geom_abline(slope = 1, intercept = 0) +
xlab('Twin Opposite Sex Correlation') +
ylab('Sibling Opposite Sex Correlation')
correlation_twinOS_sibOS
## Warning: position_dodge requires non-overlapping x intervals
ggsave(correlation_twinOS_sibOS, file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/correlation_twinOS_sibOS.png')
## Saving 7 x 5 in image
## Warning: position_dodge requires non-overlapping x intervals
DT::datatable(round_df(allphewas_all_correlation_sibling_twin,digits = 3))
correlation_twinOS_sibSS <- ggplot(allphewas_all_correlation_sibling_twin , aes(rliabos,rliabsibling_SS)) + geom_point() +
geom_errorbarh(aes(xmin=rliabos-1.96*rliabos_SE, xmax=rliabos+1.96*rliabos_SE),
position = position_dodge(width = 0.01), alpha=.2) +
geom_errorbar(aes(ymin=rliabsibling_SS-1.96*rliabsibling_SE_SS, ymax=rliabsibling_SS+1.96*rliabsibling_SE_SS),
position = position_dodge(width = 0.01), alpha=.2) + geom_abline(slope = 1, intercept = 0) +
xlab('Twin Opposite Sex Correlation') +
ylab('Sibling Same Sex Correlation')
correlation_twinOS_sibSS
## Warning: position_dodge requires non-overlapping x intervals
ggsave(correlation_twinOS_sibSS, file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/correlation_twinOS_sibSS.png')
## Saving 7 x 5 in image
## Warning: position_dodge requires non-overlapping x intervals
correlation_sibOS_sibSS <- ggplot(allphewas_sibling_correlation , aes(rliabsibling_SS,rliabsibling_OS)) + geom_point() +
geom_errorbarh(aes(xmin=rliabsibling_SS-1.96*rliabsibling_SE_SS, xmax=rliabsibling_SS+1.96*rliabsibling_SE_SS),
position = position_dodge(width = 0.01), alpha=.2) +
geom_errorbar(aes(ymin=rliabsibling_OS-1.96*rliabsibling_SE_OS, ymax=rliabsibling_OS+1.96*rliabsibling_SE_OS),
position = position_dodge(width = 0.01), alpha=.2) + geom_abline(slope = 1, intercept = 0) +
xlab('Sibling Same Sex Correlation') + ylab('Sibling Opposite Sex Correlation') +
geom_smooth(method = "lm", se = TRUE) +
theme(axis.title.x = element_text(size=12),
axis.title.y = element_text(size=12))
correlation_sibOS_sibSS
## Warning: position_dodge requires non-overlapping x intervals
ggsave(correlation_sibOS_sibSS, file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/correlation_sibOS_sibSS.png')
## Saving 7 x 5 in image
## Warning: position_dodge requires non-overlapping x intervals
allphewas_sibling_correlation_dff <- allphewas_sibling_correlation %>%
mutate(rsib_diff=rliabsibling_SS-rliabsibling_OS)
diff_sib_corOS_corSS <- ggplot(allphewas_sibling_correlation_dff, aes(rsib_diff)) +
geom_histogram(bins = 60) + xlab("Same Sex Correlation - Opposite Sex Correlation")
diff_sib_corOS_corSS
ggsave(diff_sib_corOS_corSS, file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/diff_sib_corOS_corSS.png')
## Saving 7 x 5 in image
allphewas_sibling_correlation <- allphewas_sibling_correlation %>%
mutate(rsib_diff = rliabsibling_SS-rliabsibling_OS) %>%
mutate(rsib_diff_SE = sqrt(rliabsibling_SE_SS^2+rliabsibling_SE_OS^2)) %>%
mutate(rsib_diff_low = rsib_diff - 1.96*rsib_diff_SE,
rsib_diff_high = rsib_diff + 1.96*rsib_diff_SE) %>%
mutate(rsib_diff.zscore=rsib_diff/rsib_diff_SE) %>%
mutate(rsib_diff_SE.pvalue=2*pnorm(-abs(rsib_diff.zscore))) %>%
mutate(rsib_diff.pvalue_FDR = p.adjust(rsib_diff_SE.pvalue, method='BY'))
DT::datatable(allphewas_sibling_correlation %>% filter(rsib_diff_low < 0 & rsib_diff_high > 0) %>% tally())
DT::datatable(round_df(allphewas_sibling_correlation %>% filter(rsib_diff_low < 0 & rsib_diff_high > 0) %>% tally()/551,digits = 3))
allphewas_sibling_correlation %>% filter(rsib_diff.pvalue_FDR <= 0.05) %>% tally()/561
## n
## 1 0.3850267
DT::datatable(round_df(allphewas_sibling_correlation %>% filter(rsib_diff.pvalue_FDR <= 0.05) %>% tally()
, digits = 3))
DT::datatable(round_df(allphewas_sibling_correlation %>% filter(rsib_diff.pvalue_FDR <= 0.05) %>% tally()/561
, digits = 3))
Pi_0 Statistics
qobj_sib <- qvalue(allphewas_sibling_correlation$rsib_diff_SE.pvalue)
summary(qobj_sib)
##
## Call:
## qvalue(p = allphewas_sibling_correlation$rsib_diff_SE.pvalue)
##
## pi0: 0.2363052
##
## Cumulative number of significant calls:
##
## <1e-04 <0.001 <0.01 <0.025 <0.05 <0.1 <1
## p-value 152 190 271 302 336 361 551
## q-value 152 199 298 346 386 442 551
## local FDR 128 152 207 246 276 316 551
AverageEstimate(allphewas_sibling_correlation$rsib_diff_SE.pvalue )
## $pi0
## [1] 0.2814059
##
## $significant
## [1] 0 1 0 1 0 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0
## [36] 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1
## [71] 0 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 0 0 0 0 0 0
## [106] 1 1 1 1 0 0 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
## [141] 0 1 0 1 1 1 0 1 0 0 0 1 0 0 1 1 1 1 1 0 1 1 1 1 0 1 0 1 1 1 1 1 0 1 0
## [176] 0 1 1 1 0 1 1 1 1 1 1 1 0 1 0 1 0 1 0 0 1 1 1 1 1 0 1 1 1 1 1 1 1 0 1
## [211] 0 0 0 0 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 0 1 0 0
## [246] 1 1 1 0 0 1 0 1 1 1 1 1 1 1 0 0 1 1 0 1 0 1 1 1 1 1 0 0 1 0 1 1 0 0 0
## [281] 0 1 1 1 0 0 0 1 0 1 0 1 1 1 0 0 0 1 1 1 0 0 0 0 1 0 1 1 1 0 1 1 1 0 0
## [316] 1 0 0 1 1 0 0 0 1 0 1 1 1 0 0 1 0 0 0 1 0 1 1 0 1 0 1 1 1 1 0 0 0 0 1
## [351] 0 1 1 1 1 0 0 0 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0
## [386] 1 0 1 1 1 1 1 1 0 0 1 0 1 1 1 1 1 0 1 1 1 0 0 1 1 0 0 0 0 1 0 0 0 0 1
## [421] 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 0 1 1 1 0 0 1 0 1 1 1 1 1 1 0 1 1 1 0
## [456] 0 0 1 1 1 0 1 1 1 1 0 0 0 1 1 0 0 0 0 1 1 1 1 1 1 0 0 1 1 0 1 0 1 1 1
## [491] 1 1 1 1 1 1 0 0 1 1 1 1 0 1 1 1 1 0 1 1 0 1 0 1 0 1 1 1 1 1 1 1 1 1 1
## [526] 0 1 1 1 1 0 1 1 1 0 1 0 1 0 1 0 0 1 1 1 0 1 1 0 0 0
allphewas_both_functional_domain_sibling <- allphewas_all_correlation_sibling_twin %>%
inner_join(.,phewas_functional_domain_map, by=c('phewas_code', 'phewas_description'))
## Warning: Column `phewas_description` joining factor and character vector,
## coercing into character vector
zz <- allphewas_both_functional_domain_sibling %>%
filter(phewas_code != 'COST') %>%
filter(phewas_code != 'CNT') %>%
split(.$functional_domain) %>% purrr::map(~rma(yi=rliabsibling_SS,sei=rliabsibling_SE_SS,data=., method='DL'))
phewas_b_rsib_SS <- zz %>% purrr::map(summary) %>% map_df(~.$b[1]) %>% gather(functional_domain, rliabsibling_SS,1:ncol(.))
phewas_se_rsib_SS <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, rliabsibling_SE_SS,1:ncol(.))
phewas_meta_rsib_SS_liab_all <- phewas_b_rsib_SS %>% inner_join(.,phewas_se_rsib_SS, by='functional_domain') %>% mutate(category = 'Insurance Claims')
names(phewas_meta_rsib_SS_liab_all) <- c('functional_domain','rsibSS','rsibSS_SE', 'category')
ribSSOverall <- phewas_meta_rsib_SS_liab_all %>%
mutate(rsibSS_low=rsibSS-1.96*rsibSS_SE, rsibSS_high=rsibSS+1.96*rsibSS_SE )
DT::datatable(round_df(ribSSOverall, digits=3))
#MaTCH_h2 <- functional_domain_df %>% select(functional_domain, h2_corr, h2_corr_SE) %>% mutate(category = 'MaTCH')
#names(MaTCH_h2) <- c('functional_domain','h2','h2_SE', 'category')
#metaAll_justMatch_h2 <- phewas_meta_h2_liab_all %>% rbind(.,MaTCH_h2)
#metaAll_justMatch_h2 <- metaAll_justMatch_h2 %>%
# mutate(h2_low = h2-1.96*h2_SE, h2_high = h2+1.96*h2_SE )
#DT::datatable(round_df(metaAll_justMatch_h2, digits=3))
#rm(zz, phewas_b_h2, phewas_se_h2, MaTCH_h2)
zz <- allphewas_both_functional_domain_sibling %>%
filter(phewas_code != 'COST') %>%
filter(phewas_code != 'CNT') %>%
split(.$functional_domain) %>% purrr::map(~rma(yi=rliabsibling_OS,sei=rliabsibling_SE_OS,data=., method='DL'))
phewas_b_rsib_OS <- zz %>% purrr::map(summary) %>% map_df(~.$b[1]) %>% gather(functional_domain, rliabsibling_OS,1:ncol(.))
phewas_se_rsib_OS <- zz %>% purrr::map(summary) %>% map_df("se") %>% gather(functional_domain, rliabsibling_SE_OS,1:ncol(.))
phewas_meta_rsib_OS_liab_all <- phewas_b_rsib_OS %>% inner_join(.,phewas_se_rsib_OS, by='functional_domain') %>% mutate(category = 'Insurance Claims')
names(phewas_meta_rsib_OS_liab_all) <- c('functional_domain','rsibOS','rsibOS_SE', 'category')
ribOSOverall <- phewas_meta_rsib_OS_liab_all %>%
mutate(rsibOS_low=rsibOS-1.96*rsibOS_SE, rsibOS_high=rsibOS+1.96*rsibOS_SE )
DT::datatable(round_df(ribOSOverall, digits=3))
#MaTCH_h2 <- functional_domain_df %>% select(functional_domain, h2_corr, h2_corr_SE) %>% mutate(category = 'MaTCH')
#names(MaTCH_h2) <- c('functional_domain','h2','h2_SE', 'category')
#metaAll_justMatch_h2 <- phewas_meta_h2_liab_all %>% rbind(.,MaTCH_h2)
#metaAll_justMatch_h2 <- metaAll_justMatch_h2 %>%
# mutate(h2_low = h2-1.96*h2_SE, h2_high = h2+1.96*h2_SE )
#DT::datatable(round_df(metaAll_justMatch_h2, digits=3))
#rm(zz, phewas_b_h2, phewas_se_h2, MaTCH_h2)
phewas_meta_h2_liab_all$stat <- 'h2'
phewas_meta_c2_liab_all$stat <- 'c2'
#phewas_meta_e2_liab_all$stat <- 'e2'
phewas_meta_rss_liab_all$stat <- 'rtwinOS'
phewas_meta_ros_liab_all$stat <- 'rtwinSS'
phewas_meta_rsib_SS_liab_all$stat <- 'rsibSS'
phewas_meta_rsib_OS_liab_all$stat <- 'rsibOS'
a <- phewas_meta_h2_liab_all %>% select(functional_domain, statistic = h2, se=h2_SE, stat)
b <- phewas_meta_c2_liab_all %>% select(functional_domain, statistic = c2,se=c2_SE, stat)
#c <- phewas_meta_e2_liab_all %>% select(functional_domain, statistic = e2, stat)
d <- phewas_meta_ros_liab_all %>% select(functional_domain, statistic = rOS,se=rOS_SE, stat)
e <- phewas_meta_rss_liab_all %>% select(functional_domain, statistic = rSS,se=rSS_SE, stat)
f <- phewas_meta_rsib_SS_liab_all %>% select(functional_domain, statistic = rsibSS, se=rsibSS_SE, stat)
g <- phewas_meta_rsib_OS_liab_all %>% select(functional_domain, statistic = rsibOS,se=rsibOS_SE, stat)
z_meta_all <- a %>% bind_rows(.,b,d,e,f,g)
rm(a,b,c,d,e,g,f)
## Warning in rm(a, b, c, d, e, g, f): object 'c' not found
z_meta_all$stat <- factor(z_meta_all$stat, levels=c('rtwinOS','rsibOS','rtwinSS','rsibSS', 'h2','c2'))
z_meta_all$functional_domain <- factor(z_meta_all$functional_domain,
levels = c('All Traits',
'Aging',
'Cardiovascular',
'Cognitive',
'Connective tissue',
'Dermatological',
'Developmental',
'Ear,Nose,Throat',
'Endocrine',
'Environment',
'Gastrointestinal',
'Hematological',
'Immunological',
'Infection',
'Metabolic',
'Neoplasms',
'Neurological',
'Ophthalmological',
'Psychiatric',
'Reproduction',
'Respiratory',
'Skeletal',
'Quantitative',
'Avg. Monthly Cost',
'PheWAS Comorbids'))
barchart_fn_domain <- ggplot(z_meta_all, aes(functional_domain, statistic, fill=stat)) +
geom_bar(stat="identity", position="dodge", width = 0.75, colour="black") +
theme(axis.text.x=element_text(angle=45, size = 8),
axis.title.x = element_text(vjust=10)) +
ylab('Statistic Value') + xlab('Functional Domain') + labs(fill='Statistic') +
guides(colour = guide_legend(nrow = 1))
barchart_fn_domain
ggsave(barchart_fn_domain, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/barchart_fn_domain.png')
## Saving 7 x 5 in image
braden_df_meta_analytic_estimates_noenvironment <- z_meta_all
allData_meta_all <- allphewas_both %>%
mutate(diff_rss_ros = rliabss-rliabos) %>% mutate(h2_upper = 2*rliabos) %>%
mutate(rOS=rliabos, rSS = rliabss)
scatter_h2_c2 <- ggplot(allData_meta_all, aes(h2, c2)) +
geom_point(aes(colour = c('#008FD5'))) +
xlab('h2') +
ylab('c2') +
theme(legend.position="none") +
geom_text(aes(label=ifelse(phewas_code == '984' | phewas_code == '313.1' |
phewas_code == '134577' |
phewas_code == '495' |
phewas_code == '637' |
phewas_code == '464'
,shortName,'')),hjust=0,vjust=0,fontface = "bold", size = 2)
scatter_h2_c2
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text).
scatter_h2_c2 <- ggMarginal(scatter_h2_c2)
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text).
scatter_h2_c2
ggsave(scatter_h2_c2, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/scatter_h2_c2.png', width = 5, height = 3)
#scatter_h2_e2 <- ggplot(allData_meta_all, aes(h2, e2)) +
# geom_point(aes(colour = c('#008FD5'))) +
# xlab('h2') +
# ylab('e2') +
# theme(legend.position="none") +
# geom_text(aes(label=ifelse(phewas_code == '313.1' | phewas_code == '134577' | phewas_code == '495' |
# phewas_code == '871.1' | phewas_code == '800.3' | phewas_code =='726.3'
# ,shortName,'')),hjust=0,vjust=0,fontface = "bold", size = 2)
#scatter_h2_e2 <- ggMarginal(scatter_h2_e2)
scatter_h2_h2_upper <- ggplot(allData_meta_all, aes(h2, h2_upper)) +
geom_point(aes(colour = c('#008FD5'))) +
xlab('h2') +
ylab('h2_upper (2*rOS)') +
theme(legend.position="none") +
geom_text(aes(label=ifelse(phewas_code == '984' | phewas_code == '313.1' |
phewas_code == '134577' |
phewas_code == '495' |
phewas_code == '637' |
phewas_code == '464'
,shortName,'')),hjust=0,vjust=0,fontface = "bold", size = 2)
scatter_h2_h2_upper <- ggMarginal(scatter_h2_h2_upper)
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text).
scatter_ros_rss <- ggplot(allData_meta_all, aes(rOS, rSS)) +
geom_point(aes(colour = c('#008FD5'))) +
xlab('rtwinOS') +
ylab('rtwinSS') +
theme(legend.position="none") +
geom_text(aes(label=ifelse(phewas_code == '362.1' | phewas_code == '464' |
phewas_code == '134577' | phewas_code == '696.4' | phewas_code == '313.1' |
phewas_code == '871.1',
shortName,'')),hjust=0,vjust=0,fontface = "bold", size = 2, nudge_x=-.01)
scatter_ros_rss <- ggMarginal(scatter_ros_rss)
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text).
ggsave(scatter_ros_rss, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/scatter_ros_rss.png',width = 5, height = 3)
scatter_h2_c2_ros_rss <- ggarrange(scatter_h2_c2,scatter_h2_h2_upper ,scatter_ros_rss,
labels = c("a)", "b)", "c)"),
ncol = 2, nrow = 2)
scatter_h2_c2_ros_rss
ggsave(scatter_h2_c2_ros_rss, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/scatter_h2_c2_ros_rss.png')
## Saving 7 x 5 in image
cost_comorbid_pheno <- allphewas_both %>%
dplyr::filter(phewas_code == 'COST' | phewas_code == 'CNT')
cost_comorbid_pheno$Description <- c('Number PheWAS Comorbidities','Avg. Monthly Cost')
cost_comorbid_pheno <- cost_comorbid_pheno %>% select(phewas_code, Description, rss01=rliabss,rliabss_SE, ros01=rliabos,rliabos_SE,h2_liab, h2_liab_SE, c2_liab, c2_liab_SE)
cost_comorbid_pheno_rss <- cost_comorbid_pheno[,c('phewas_code', 'Description','rss01','rliabss_SE')] %>%
mutate(statisticType = 'rSS')
cost_comorbid_pheno_ros <- cost_comorbid_pheno %>%
select(phewas_code, Description,ros01,rliabos_SE) %>%
mutate(statisticType = 'rOS')
cost_comorbid_pheno_h2 <- cost_comorbid_pheno %>%
select(phewas_code, Description,h2_liab,h2_liab_SE) %>%
mutate(statisticType = 'h2')
cost_comorbid_pheno_c2 <- cost_comorbid_pheno %>%
select(phewas_code, Description,c2_liab,c2_liab_SE) %>%
mutate(statisticType = 'c2')
names(cost_comorbid_pheno_rss) <- c('phewas_code','Description','statistic','standard_error','statisticType' )
names(cost_comorbid_pheno_ros) <- c('phewas_code','Description','statistic','standard_error','statisticType')
names(cost_comorbid_pheno_h2) <- c('phewas_code','Description','statistic','standard_error','statisticType')
names(cost_comorbid_pheno_c2) <- c('phewas_code','Description','statistic','standard_error','statisticType')
cost_comorbid_pheno_long <- cost_comorbid_pheno_rss %>%
rbind(.,cost_comorbid_pheno_ros) %>%
rbind(.,cost_comorbid_pheno_h2) %>%
rbind(.,cost_comorbid_pheno_c2)
cost_comorbid_ggplot <- ggplot(cost_comorbid_pheno_long, aes(y=statistic, x=as.factor(Description), colour=statisticType)) +
geom_point(position = position_dodge(width = .5)) +
geom_errorbar(aes(ymin=statistic-1.96*standard_error, ymax=statistic+1.96*standard_error),position = position_dodge(width = 0.5)) +
theme(axis.text=element_text(size=8)) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 80)) +
theme(axis.title = element_text()) + ylab('Statistic') + xlab('Phenotype') + labs(color='Statistic Type')
cost_comorbid_ggplot
ggsave(cost_comorbid_ggplot, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/cost_comorbid_ggplot.png')
## Saving 7 x 5 in image
costDistribution <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/costDistribution.csv")
## Parsed with column specification:
## cols(
## cost = col_double()
## )
comorbidDistribution <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/comorbidDistribution.csv")
## Parsed with column specification:
## cols(
## comorbids = col_integer()
## )
costggplotCDF <- ggplot(costDistribution %>% filter(cost >=0), aes(cost )) + stat_ecdf(geom = "step", pad = 'FALSE') +
ylab('Percentage of Twins') + xlab('Average Monthly Cost')
comorbidggplotCDF <- ggplot(comorbidDistribution, aes(comorbids )) + stat_ecdf(geom = "step", pad = 'FALSE') +
ylab('Percentage of Twins') + xlab('Number of Comorbids')
cdf_cost_comorbid <- ggarrange(costggplotCDF,comorbidggplotCDF,labels = c( "a)", 'b)'))
cdf_cost_comorbid_bar_chart <- ggarrange(cdf_cost_comorbid,cost_comorbid_ggplot,labels = c( "c)", ''), nrow=2)
cdf_cost_comorbid_bar_chart
ggsave(cdf_cost_comorbid_bar_chart,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/cdf_cost_comorbid_bar_chart.png', height = 8, width=8)
cost_comorbid_pheno_table <- cost_comorbid_pheno %>%
mutate(rss_low=rss01-1.96*rliabss_SE,rss_high=rss01+1.96*rliabss_SE ) %>%
mutate(ros_low=ros01-1.96*rliabos_SE,ros_high=ros01+1.96*rliabos_SE ) %>%
mutate(h2_low=h2_liab-1.96*h2_liab_SE,h2_high=h2_liab+1.96*h2_liab_SE ) %>%
mutate(c2_low=c2_liab-1.96*c2_liab_SE,c2_high=c2_liab+1.96*c2_liab_SE ) %>%
select(phewas_code, Description,rss01, rss_low,rss_high, ros01, ros_low, ros_high, h2_liab, h2_low, h2_high,c2_liab, c2_low, c2_high)
DT::datatable(round_df(cost_comorbid_pheno_table, digits=3))
twinPairsYOB <- read_delim("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/twinPairsYOB.csv",
"\t", escape_double = FALSE, col_types = cols(phewas_code = col_character()),
trim_ws = TRUE)
twinPairsYOB <- twinPairsYOB %>% inner_join(.,quant_pheno_description, by='phewas_code')
twinPairsYOB <- twinPairsYOB %>% mutate(Description = ifelse(phewas_code == 'COST','Binary Phenotypes', Description))
ggplot_ageDistribution <- ggplot(twinPairsYOB, aes(YOB, fill = Description, colour = Description)) +
geom_density(alpha = 0.1) + facet_wrap(~Description) +
theme(axis.text.x=element_text(angle=45, size = 8),
axis.title.x = element_text(vjust=2)) +
ylab('Density') + xlab('Year of Birth') +
theme(legend.position="none")
ggplot_ageDistribution
ggsave(ggplot_ageDistribution, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/ggplot_ageDistribution.png')
## Saving 7 x 5 in image
thresh_h2 <- allphewas_both_zipcode %>% filter(pvalue_h2_FDR <= 0.05) %>% summarise(val=min(observed_h2))
thresh_c2 <- allphewas_both_zipcode %>% filter(pvalue_c2_FDR <= 0.05) %>% summarise(val=min(observed_c2))
thresh_e2 <- allphewas_both_zipcode %>% filter(e2.pvalue_FDR <= 0.05) %>% summarise(val=min(observed_e2))
thresh_rliabincome <- allphewas_both_zipcode %>%
filter(rliabincome.pvalue_FDR <= 0.05) %>% summarise(val=min(observed_rliabincome))
thresh_rliabaqi <- allphewas_both_zipcode %>%
filter(rliabaqi.pvalue_FDR <= 0.05) %>% summarise(val=min(observed_rliabaqi))
thresh_rliabtemp <- allphewas_both_zipcode %>%
filter(rliabtemp.pvalue_FDR <= 0.05) %>% summarise(val=min(observed_rliabtemp))
fullData_binary_quant <- allphewas_both_zipcode %>%
mutate(twinPrevalenceCategory = as.character(cut(prevalence, breaks=c(0,.10,.30,.80),include.lowest=TRUE,right=FALSE))) %>%
replace_na(list(twinPrevalenceCategory='Quantitative Trait')) %>%
mutate(h2_rank = rank(-h2_liab.zscore ,ties.method='random')) %>%
mutate(c2_rank = rank(-c2_liab.zscore,ties.method='random')) %>%
mutate(e2_rank = rank(-e2.zscore,ties.method='random')) %>%
mutate(rliabincome_rank = rank(-rliabincome.zscore,ties.method='random')) %>%
mutate(rliabaqi_rank = rank(-rliabaqi.zscore,ties.method='random')) %>%
mutate(rliabtemp_rank = rank(-rliabtemp.zscore,ties.method='random')) %>%
mutate(h2_rank_effect = rank(-h2_liab ,ties.method='random')) %>%
mutate(c2_rank_effect = rank(-c2_liab,ties.method='random')) %>%
mutate(e2_rank_effect = rank(-e2,ties.method='random')) %>%
mutate(rliabincome_rank_effect = rank(-rliabincome,ties.method='random')) %>%
mutate(rliabaqi_rank_effect = rank(-rliabaqi,ties.method='random')) %>%
mutate(rliabtemp_rank_effect = rank(-rliabtemp,ties.method='random')) %>%
mutate(h2_label = ifelse(h2_rank <= 5 | h2_rank_effect <= 2,shortName,'')) %>%
mutate(c2_label = ifelse(c2_rank <= 5 |c2_rank_effect <= 2 ,shortName,'')) %>%
mutate(rliabincome_label = ifelse(rliabincome_rank <= 5 | rliabincome_rank_effect <= 2,shortName,'')) %>%
mutate(rliabaqi_label = ifelse(rliabaqi_rank <= 5 | rliabaqi_rank_effect <= 2 ,shortName,'')) %>%
mutate(rliabtemp_label = ifelse(rliabtemp_rank <= 5 | rliabtemp_rank_effect <= 2, shortName,''))
unique(fullData_binary_quant$twinPrevalenceCategory)
## [1] "[0,0.1)" "[0.1,0.3)" "[0.3,0.8]"
## [4] "Quantitative Trait"
fullData_binary_quant$twinPrevalenceCategory <-factor(fullData_binary_quant$twinPrevalenceCategory,
levels = c('[0,0.1)','[0.1,0.3)','[0.3,0.8]','Quantitative Trait'))
alphaLevel <- .6
library(RColorBrewer)
myColors <- c('lightsalmon1','blue','green', 'yellow2')
names(myColors) <- levels(fullData_binary_quant$twinPrevalenceCategory)
p1 <- ggplot(fullData_binary_quant, aes(h2_liab,observed_h2, label=h2_label)) +
geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory)) +
xlab('Heritability') + ylab('-log10(p)') +
geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +
scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
geom_text_repel( fontface = "bold", size = 3)
p2 <- ggplot(fullData_binary_quant, aes(c2_liab,observed_c2, label=c2_label)) +
geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory)) +
xlab('Shared Environmental Variance') + ylab('-log10(p)') +
geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +
scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
geom_text_repel( fontface = "bold", size = 3)
#p3 <- ggplot(fullData_binary_quant, aes(e2,observed_e2)) +
# geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory)) +
# xlab('Residual Variance') + ylab('-log10(p)') +
# geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +
# scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
# geom_text(position='jitter',aes(label=ifelse(( e2_rank ==1 | e2_rank == 2 |
# e2_rank ==5 | e2_rank==7 ),shortName,'')),hjust=1,vjust=1,fontface = "bold", size = 3) +
# geom_text(position='jitter',aes(label=ifelse((e2_rank == 3 ),shortName,'')),hjust=0,vjust=1,fontface = "bold", size = 3)
p4 <- ggplot(fullData_binary_quant, aes(rliabincome,observed_rliabincome, label=rliabincome_label) ) +
geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory)) +
xlab('Socioeconomic Status Variance Component') + ylab('-log10(p)') +
geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +
scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
geom_text_repel( fontface = "bold", size = 3)
p5 <- ggplot(fullData_binary_quant, aes(rliabaqi,observed_rliabaqi, label=rliabaqi_label) ) +
geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory)) +
geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +
scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
xlab('Monthly AQI Variance Component') + ylab('-log10(p)') +
geom_text_repel( fontface = "bold", size = 3)
p6 <- ggplot(fullData_binary_quant, aes(rliabtemp,observed_rliabtemp, label=rliabtemp_label) ) +
geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory)) +
xlab('Monthly Temperature Variance Component') + ylab('-log10(p)') +
geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +
scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
geom_text_repel( fontface = "bold", size = 3)
volcano_plot <- ggarrange(p1,p2, p4 , p5, p6, labels = c( 'b)', 'c)','d)', 'e)' ,'f)' ), common.legend = TRUE, nrow=3,ncol = 2, legend = 'bottom')
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_text_repel).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
#barchart_fn_domain
#volcano_bar_chart <- ggarrange(barchart_fn_domain,volcano_plot, labels=c('a)',''), nrow = 2,heights = 1:2)
#volcano_bar_chart
#ggsave(volcano_bar_chart,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/volcano_bar_chart.png', height = 12, width = 10)
df_tttt <- fullData_binary_quant %>% select(phewas_code, phewas_description, rliabincome,rliabincome_SE, rliabincome.pvalue, rliabincome.pvalue_FDR, rliabincome.zscore, rliabincome_rank)
daily_aqi_by_county_2007 <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/aqi_data/daily_aqi_by_county_2007.csv")
## Parsed with column specification:
## cols(
## `State Name` = col_character(),
## `county Name` = col_character(),
## `State Code` = col_character(),
## `County Code` = col_character(),
## Date = col_date(format = ""),
## AQI = col_integer(),
## Category = col_character(),
## `Defining Parameter` = col_character(),
## `Defining Site` = col_character(),
## `Number of Sites Reporting` = col_integer()
## )
daily_aqi_by_county_2008 <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/aqi_data/daily_aqi_by_county_2008.csv")
## Parsed with column specification:
## cols(
## `State Name` = col_character(),
## `county Name` = col_character(),
## `State Code` = col_character(),
## `County Code` = col_character(),
## Date = col_date(format = ""),
## AQI = col_integer(),
## Category = col_character(),
## `Defining Parameter` = col_character(),
## `Defining Site` = col_character(),
## `Number of Sites Reporting` = col_integer()
## )
daily_aqi_by_county_2009 <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/aqi_data/daily_aqi_by_county_2009.csv")
## Parsed with column specification:
## cols(
## `State Name` = col_character(),
## `county Name` = col_character(),
## `State Code` = col_character(),
## `County Code` = col_character(),
## Date = col_date(format = ""),
## AQI = col_integer(),
## Category = col_character(),
## `Defining Parameter` = col_character(),
## `Defining Site` = col_character(),
## `Number of Sites Reporting` = col_integer()
## )
daily_aqi_by_county_2010 <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/aqi_data/daily_aqi_by_county_2010.csv")
## Parsed with column specification:
## cols(
## `State Name` = col_character(),
## `county Name` = col_character(),
## `State Code` = col_character(),
## `County Code` = col_character(),
## Date = col_date(format = ""),
## AQI = col_integer(),
## Category = col_character(),
## `Defining Parameter` = col_character(),
## `Defining Site` = col_character(),
## `Number of Sites Reporting` = col_integer()
## )
daily_aqi_by_county_2011 <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/aqi_data/daily_aqi_by_county_2011.csv")
## Parsed with column specification:
## cols(
## `State Name` = col_character(),
## `county Name` = col_character(),
## `State Code` = col_character(),
## `County Code` = col_character(),
## Date = col_date(format = ""),
## AQI = col_integer(),
## Category = col_character(),
## `Defining Parameter` = col_character(),
## `Defining Site` = col_character(),
## `Number of Sites Reporting` = col_integer()
## )
daily_aqi_by_county_2012 <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/aqi_data/daily_aqi_by_county_2012.csv")
## Parsed with column specification:
## cols(
## `State Name` = col_character(),
## `county Name` = col_character(),
## `State Code` = col_character(),
## `County Code` = col_character(),
## Date = col_date(format = ""),
## AQI = col_integer(),
## Category = col_character(),
## `Defining Parameter` = col_character(),
## `Defining Site` = col_character(),
## `Number of Sites Reporting` = col_integer()
## )
daily_aqi_by_county_2013 <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/aqi_data/daily_aqi_by_county_2013.csv")
## Parsed with column specification:
## cols(
## `State Name` = col_character(),
## `county Name` = col_character(),
## `State Code` = col_character(),
## `County Code` = col_character(),
## Date = col_date(format = ""),
## AQI = col_integer(),
## Category = col_character(),
## `Defining Parameter` = col_character(),
## `Defining Site` = col_character(),
## `Number of Sites Reporting` = col_integer()
## )
daily_aqi_by_county_2014 <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/aqi_data/daily_aqi_by_county_2014.csv")
## Parsed with column specification:
## cols(
## `State Name` = col_character(),
## `county Name` = col_character(),
## `State Code` = col_character(),
## `County Code` = col_character(),
## Date = col_date(format = ""),
## AQI = col_integer(),
## Category = col_character(),
## `Defining Parameter` = col_character(),
## `Defining Site` = col_character(),
## `Number of Sites Reporting` = col_integer()
## )
daily_aqi_by_county_2015 <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/aqi_data/daily_aqi_by_county_2015.csv")
## Parsed with column specification:
## cols(
## `State Name` = col_character(),
## `county Name` = col_character(),
## `State Code` = col_character(),
## `County Code` = col_character(),
## Date = col_date(format = ""),
## AQI = col_integer(),
## Category = col_character(),
## `Defining Parameter` = col_character(),
## `Defining Site` = col_character(),
## `Number of Sites Reporting` = col_integer()
## )
daily_aqi_by_county_2016 <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/aqi_data/daily_aqi_by_county_2016.csv")
## Parsed with column specification:
## cols(
## `State Name` = col_character(),
## `county Name` = col_character(),
## `State Code` = col_character(),
## `County Code` = col_character(),
## Date = col_date(format = ""),
## AQI = col_integer(),
## Category = col_character(),
## `Defining Parameter` = col_character(),
## `Defining Site` = col_character(),
## `Number of Sites Reporting` = col_integer()
## )
aqi_monthly <- daily_aqi_by_county_2007 %>% bind_rows(.,daily_aqi_by_county_2008,daily_aqi_by_county_2009,
daily_aqi_by_county_2010,daily_aqi_by_county_2011,
daily_aqi_by_county_2012,daily_aqi_by_county_2013,
daily_aqi_by_county_2014,daily_aqi_by_county_2015,
daily_aqi_by_county_2016) %>%
select(state=`State Name`, county=`county Name`, county_code=`County Code`, Date, AQI)
mecklenburg_aqi <- aqi_monthly %>% filter(state=='North Carolina', county == 'Mecklenburg')
mecklenburg_aqi$Date[366]
## [1] "2008-01-01"
as.numeric(mecklenburg_aqi$Date[366])
## [1] 13879
mecklenburg_aqi$Date[2557]
## [1] "2013-12-31"
as.numeric(mecklenburg_aqi$Date[2557])
## [1] 16070
mecklenburg_noaa_sensor <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/analysis/var_comp_zipcode_analysis/mecklenburg_noaa_sensor.csv",
col_types = cols(`?column?` = col_double(),
startdate = col_character()))
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 3 parsing failures.
## row # A tibble: 3 x 5 col row col expected actual file expected <int> <chr> <chr> <chr> <chr> actual 1 3 ?colum… a double "\" \"" '~/Dropbox (HMS)/RagGroup Team Folder/ge… file 2 4 ?colum… a double "\" \"" '~/Dropbox (HMS)/RagGroup Team Folder/ge… row 3 87 ?colum… a double "\" \"" '~/Dropbox (HMS)/RagGroup Team Folder/ge…
names(mecklenburg_noaa_sensor) <- c('startdate','Temperature')
mecklenburg_noaa_sensor <- mecklenburg_noaa_sensor %>%
mutate(Date=substring(startdate,0,10)) %>% mutate(Date=parse_datetime(Date, format='%F'))
mecklenburg_noaa_sensor$Date[9]
## [1] "2008-01-01 UTC"
as.numeric(mecklenburg_noaa_sensor$Date[9])
## [1] 1199145600
mecklenburg_noaa_sensor$Date[63]
## [1] "2014-01-01 UTC"
as.numeric(mecklenburg_noaa_sensor$Date[63])
## [1] 1388534400
axis_text <- element_text(face = "bold", color = "black", size = 12)
library(readr)
ACS_13_5YR_S1901_with_ann <- read_csv("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/analysis/var_comp_zipcode_analysis/ACS_13_5YR_S1901/ACS_13_5YR_S1901_with_ann.csv",
skip = 1)
## Parsed with column specification:
## cols(
## .default = col_double(),
## Id = col_character(),
## Id2 = col_integer(),
## Geography = col_character(),
## `Households; Estimate; Total` = col_integer(),
## `Households; Margin of Error; Total` = col_integer(),
## `Families; Estimate; Total` = col_integer(),
## `Families; Margin of Error; Total` = col_integer(),
## `Married-couple families; Estimate; Total` = col_integer(),
## `Married-couple families; Margin of Error; Total` = col_integer(),
## `Nonfamily households; Estimate; Total` = col_integer(),
## `Nonfamily households; Margin of Error; Total` = col_integer(),
## `Households; Estimate; Median income (dollars)` = col_integer(),
## `Households; Margin of Error; Median income (dollars)` = col_integer(),
## `Families; Estimate; Median income (dollars)` = col_integer(),
## `Families; Margin of Error; Median income (dollars)` = col_integer(),
## `Married-couple families; Estimate; Median income (dollars)` = col_integer(),
## `Married-couple families; Margin of Error; Median income (dollars)` = col_integer(),
## `Nonfamily households; Estimate; Median income (dollars)` = col_integer(),
## `Nonfamily households; Margin of Error; Median income (dollars)` = col_integer(),
## `Households; Estimate; Mean income (dollars)` = col_integer()
## # ... with 28 more columns
## )
## See spec(...) for full column specifications.
income_distribution <- ACS_13_5YR_S1901_with_ann %>%
select(Id2, starts_with('Families;')) %>%
select(Id2, contains('Estimate;')) %>%
select(-contains('PERCENT IMPUTED')) %>%
gather(estimate, percentage, 3:12) %>%
separate(estimate,c('fam','est','bracket'), sep = ';') %>% select(-fam,-est) %>%
mutate(bracket = trimws(bracket, which=c('both')))
income_distribution$bracket <- factor(income_distribution$bracket,
levels = c('Less than $10,000',
'$10,000 to $14,999',
'$15,000 to $24,999',
'$25,000 to $34,999',
'$35,000 to $49,999',
'$50,000 to $74,999',
'$75,000 to $99,999',
'$100,000 to $149,999',
'$150,000 to $199,999',
'$200,000 or more'))
myLoc <-
(which(levels(income_distribution$bracket) == '$50,000 to $74,999') +
which(levels(income_distribution$bracket) == '$75,000 to $99,999')) / 2
size_text <- 6
income_plot <- ggplot(income_distribution, aes(x=bracket, y=percentage)) + geom_col(fill='dark green') +
xlab('Income Bracket') + ylab('Percentage') +
geom_vline(aes(xintercept = myLoc), size=2) +
theme(axis.text.x = element_text(angle = 20, size=size_text,hjust = 1),
axis.text.y = element_text(size=size_text),
axis.title.x = element_text(size=size_text),
axis.title.y = element_text(size=size_text)) +
theme(legend.text=element_text(size=size_text))
income_plot
ts_aqi <- ggplot(mecklenburg_aqi, aes(x = Date, y = AQI)) +
geom_line(color='blue') +
xlab('Date') + ylab('Daily AQI') +
geom_vline(xintercept =13879, size=2) +
geom_vline(xintercept =16070, size=2) +
theme(axis.text.x = element_text(angle = 0, size=size_text),
axis.text.y = element_text(size=size_text),
axis.title.x = element_text(size=size_text),
axis.title.y = element_text(size=size_text)) +
theme(legend.text=element_text(size=size_text))
ts_temperature <- ggplot(mecklenburg_noaa_sensor, aes(x = Date, y = Temperature)) +
geom_line(color='red') +
geom_vline(xintercept =as.numeric(mecklenburg_noaa_sensor$Date[63]), size=2) +
geom_vline(xintercept =as.numeric(mecklenburg_noaa_sensor$Date[9]), size=2) +
xlab('Date') + ylab('Average Monthly Temp') +
theme(axis.text.x = element_text(angle = 0, size=size_text),
axis.text.y = element_text(size=size_text),
axis.title.x = element_text(size=size_text),
axis.title.y = element_text(size=size_text)) +
theme(legend.text=element_text(size=size_text))
ts_both <- ggarrange(ts_aqi, ts_temperature, income_plot, labels=c('d)', 'e)', 'f)'), nrow=3,
font.label = list(size = 8))
## Warning: Removed 1 rows containing missing values (geom_path).
ts_both
library(magick)
## Linking to ImageMagick 6.9.9.39
## Enabled features: cairo, fontconfig, freetype, lcms, pango, rsvg, webp
## Disabled features: fftw, ghostscript, x11
meck_diagram <- image_read('~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/mecklenburg_grapple_image_v2.png')
meck_diagram_ggplot <- image_ggplot(meck_diagram)
ts_both_picture <- ggarrange(ts_both,meck_diagram_ggplot, labels=c('', 'g)'), ncol=2, font.label = list(size = 8))
#ggsave(ts_temperature,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/mecklenburg_ts_noaa.png')
#ggsave(ts_aqi,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/mecklenburg_ts_AQI.png')
#ggsave(ts_both_picture, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure1b.png')
#ts_both_picture
#ggsave(ts_both, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure1b_left.png')
ts_both_picture
#ggsave(ts_both,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/mecklenburg_data.png', height=12, width=8)
map_prevalence_data <- readRDS(all,file = '~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/maps/map_prevalence_data.rds')
pp <- ggplot() +
geom_polygon(data = map_prevalence_data,
aes(x = long, y = lat, group = group, fill = numPairs),
color = "black", size = 0.25) +
coord_map() +
labs(fill = "Number of Twin Pairs") + xlim(-140,-66) + theme_map() +
theme(legend.text=element_text(size=size_text),
legend.title = element_text(size=size_text),
legend.key.size = unit(.25,'cm'))
pp
df_Pop <- readRDS(file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/SES_Analysis/popDensity_twin_ACS.rds')
df_SES <- readRDS(file='~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/revised_analysis/SES_Analysis/SES_twin_ACS.rds')
popDensityPlot <- ggplot(df_Pop, aes(x=densityBin, y=normalizedPop)) +
geom_bar(stat='identity', aes(fill = cohort)) +
facet_wrap(~cohort,nrow = 1) +
theme(axis.text.x = element_text(angle = 60, size=1),
axis.text.y = element_text(size=size_text),
axis.title.x = element_text(size=size_text),
axis.title.y = element_text(size=size_text),
strip.text = element_text(size=size_text)) +
theme(legend.text=element_text(size=size_text)) +
xlab("Log of Population Per Square Mile Area") +
ylab("% Total Population")
sesDensityPlot <- ggplot(df_SES, aes(x=SES_bin, y=normalizedPop)) +
geom_bar(stat='identity', aes(fill = cohort)) +
facet_wrap(~cohort,nrow = 1) +
theme(axis.text.x = element_text(angle = 60, size=1),
axis.text.y = element_text(size=size_text),
axis.title.x = element_text(size=size_text),
axis.title.y = element_text(size=size_text),
strip.text = element_text(size=size_text)) +
theme(legend.text=element_text(size_text)) +
xlab("Depravity Index Score") + ylab("% Total Population")
figure1a <- ggarrange(popDensityPlot,sesDensityPlot,labels = c( 'b)','c)'),
common.legend=TRUE, ncol = 2, legend = 'none', align = 'hv',
font.label = list(size = 8))
figure1 <- ggarrange(pp,figure1a,labels = c( "a)", ''), common.legend=FALSE, nrow = 2, ncol = 1,legend = 'top',
font.label = list(size = 8))
figure1
figure1_combined <- ggarrange(figure1,ts_both_picture, nrow=2,common.legend = FALSE)
figure1_combined
#save_plot(figure1_combined,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure1_combined.png',
# base_aspect_ratio = 1.3 # make room for figure legend
# )
#ggsave(figure1,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure1a.png')
#save_plot(ts_both_picture,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure1b.png')
ggsave(figure1_combined,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure1.pdf',
height = 8.5, width=6.9, units='in', dpi = 'retina', device = 'pdf')
#ggsave(pp, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/twin_map.png')
#ggsave(popDensityPlot, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/popDensityPlot.png')
#ggsave(sesDensityPlot, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/sesDensityPlot.png')
size_text <- 8
barchart_fn_domain <- ggplot(z_meta_all, aes(functional_domain, statistic, fill=stat)) +
geom_bar(stat="identity", position="dodge", width = 0.85, colour="black",size=.15) +
ylab('Statistic Value') + xlab('Functional Domain') + labs(fill='Statistic') +
theme(axis.text.x = element_text(angle = 45, size=size_text, hjust = .9),
axis.text.y = element_text(size=size_text),
axis.title.x = element_text(size=size_text, vjust=.1),
axis.title.y = element_text(size=size_text)) +
theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text)) +
theme(legend.direction = "horizontal",legend.position = c(.35,.85)) +
geom_errorbar(aes(ymin=statistic-1.96*se,
ymax=statistic+1.96*se),
width=.5,size=.2,
position=position_dodge(.9))
barchart_fn_domain
alphaLevel <- .6
library(RColorBrewer)
myColors <- c('lightsalmon1','blue','green', 'yellow2')
names(myColors) <- levels(fullData_binary_quant$twinPrevalenceCategory)
labelSize <- 1.5
p1 <- ggplot(fullData_binary_quant, aes(h2_liab,observed_h2, label=h2_label)) +
geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory)) +
xlab('Heritability') + ylab('-log10(p)') +
geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +
scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
geom_text_repel( fontface = "bold", size = labelSize) +
theme(axis.text.x = element_text(angle = 0, size=size_text, hjust = .75),
axis.text.y = element_text(size=size_text),
axis.title.x = element_text(size=size_text, vjust=.1),
axis.title.y = element_text(size=size_text)) +
theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text))
p2 <- ggplot(fullData_binary_quant, aes(c2_liab,observed_c2, label=c2_label)) +
geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory)) +
xlab('Shared Environmental Variance') + ylab('-log10(p)') +
geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +
scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
geom_text_repel( fontface = "bold", size = labelSize) +
theme(axis.text.x = element_text(angle = 0, size=size_text, hjust = .75),
axis.text.y = element_text(size=size_text),
axis.title.x = element_text(size=size_text, vjust=.1),
axis.title.y = element_text(size=size_text)) +
theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text))
p4 <- ggplot(fullData_binary_quant, aes(rliabincome,observed_rliabincome, label=rliabincome_label) ) +
geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory)) +
xlab('Socioeconomic Status Variance Component') + ylab('-log10(p)') +
geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +
scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
geom_text_repel( fontface = "bold", size = labelSize) +
theme(axis.text.x = element_text(angle = 0, size=size_text, hjust = .75),
axis.text.y = element_text(size=size_text),
axis.title.x = element_text(size=size_text, vjust=.1),
axis.title.y = element_text(size=size_text)) +
theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text))
p5 <- ggplot(fullData_binary_quant, aes(rliabaqi,observed_rliabaqi, label=rliabaqi_label) ) +
geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory)) +
geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +
scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
xlab('Monthly AQI Variance Component') + ylab('-log10(p)') +
geom_text_repel( fontface = "bold", size = labelSize) +
theme(axis.text.x = element_text(angle = 0, size=size_text, hjust = .75),
axis.text.y = element_text(size=size_text),
axis.title.x = element_text(size=size_text, vjust=.1),
axis.title.y = element_text(size=size_text)) +
theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text))
p6 <- ggplot(fullData_binary_quant, aes(rliabtemp,observed_rliabtemp, label=rliabtemp_label) ) +
geom_point( alpha =alphaLevel, aes(colour = twinPrevalenceCategory)) +
xlab('Monthly Temperature Variance Component') + ylab('-log10(p)') +
geom_hline(yintercept=thresh_h2$val,colour="#BB0000", linetype="dashed") +
scale_colour_manual(name = "Prevalence",values = myColors) + theme(legend.position="none") +
geom_text_repel( fontface = "bold", size = labelSize) +
theme(axis.text.x = element_text(angle = 0, size=size_text, hjust = .75),
axis.text.y = element_text(size=size_text),
axis.title.x = element_text(size=size_text, vjust=.1),
axis.title.y = element_text(size=size_text)) +
theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text))
volcano_plot <- ggarrange(p1,p2, p4 , p5, p6, labels = c( 'b)', 'c)','d)', 'e)' ,'f)' ), common.legend = TRUE, nrow=3,ncol = 2, legend = 'bottom',
font.label = list(size = size_text))
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_text_repel).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
barchart_fn_domain
volcano_bar_chart <- ggarrange(barchart_fn_domain,volcano_plot, labels=c('a)',''), nrow = 2,heights = 1:2,font.label = list(size = size_text))
volcano_bar_chart
ggsave(volcano_bar_chart,
file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure2.pdf',
height = 8, width = 6.9, units='in', device = 'pdf', dpi = 'retina')
size_text <- 8
cost_comorbid_pheno_long <- cost_comorbid_pheno_long %>%
mutate(statisticType = ifelse(statisticType == 'rSS', 'rtwinSS',statisticType )) %>%
mutate(statisticType = ifelse(statisticType == 'rOS', 'rtwinOS',statisticType ))
cost_comorbid_ggplot <- ggplot(cost_comorbid_pheno_long, aes(x=as.factor(Description), y=statistic, fill=statisticType)) +
geom_bar(stat="identity", color="black",
position=position_dodge()) +
geom_errorbar(aes(ymin=statistic-1.96*standard_error,
ymax=statistic+1.96*standard_error),
width=.4,
position=position_dodge(.9)) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 15)) +
coord_flip() +
theme(axis.title = element_text()) + ylab('Statistic Value') + xlab('Phenotype') + labs(color='Statistic Type') +
theme(legend.position="bottom") +
theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text)) +
theme(axis.text=element_text(size=size_text)) +
theme(axis.text.x = element_text(angle = 0, size=size_text, hjust = .75),
axis.text.y = element_text(size=size_text),
axis.title.x = element_text(size=size_text, vjust=.1),
axis.title.y = element_text(size=size_text))
cost_comorbid_ggplot
comparisonplot_publishedLit <- ggplot(twin_h2_c2_comparison_diff_twin_match , aes(h2_match,h2_liab)) + geom_point() +
geom_errorbarh(aes(xmin=h2_match-1.96*h2_match_SE, xmax=h2_match+1.96*h2_match_SE),
position = position_dodge(width = 0.01), alpha=.2) +
geom_errorbar(aes(ymin=h2_liab-1.96*h2_liab_SE, ymax=h2_liab+1.96*h2_liab_SE),
position = position_dodge(width = 0.01), alpha=.2) + geom_abline(slope = 1, intercept = 0) +
geom_smooth(method = "lm", se = TRUE) + xlab('Heritability - Published Studies') + ylab('Heritability - Insurance Study') +
theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text)) +
theme(axis.text=element_text(size=size_text)) +
theme(axis.text.x = element_text(angle = 0, size=size_text, hjust = .75),
axis.text.y = element_text(size=size_text),
axis.title.x = element_text(size=size_text, vjust=.1),
axis.title.y = element_text(size=size_text))
figure3 <- ggarrange(comparisonplot_publishedLit,cost_comorbid_ggplot,labels = c( "a)", 'b)'), nrow=2,ncol = 1,font.label = list(size = size_text))
## Warning: position_dodge requires non-overlapping x intervals
figure3
ggsave(figure3,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure3.pdf',
height = 7, width = 6.9, device = 'pdf', dpi = 'retina')
size_text <- 8
h2_liab_ggplot <- ggplot(metaAll_justMatch_h2, aes(y=h2, x=functional_domain, colour=category)) +
geom_point(position = position_dodge(width = .5)) +
geom_errorbar(aes(ymin=h2-1.96*h2_SE, ymax=h2+1.96*h2_SE),position = position_dodge(width = 0.5)) +
coord_flip() +
theme(axis.text=element_text(size=size_text)) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 80)) +
theme(axis.title = element_text(size=size_text)) + ylab('Heritability (h2)') + xlab('Functional Domain') +
theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text))
#ggsave(h2_liab_ggplot,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/h2_liab_comparison_match.png', height=3.5, width=6)
c2_liab_ggplot <- ggplot(metaAll_justMatch_c2, aes(y=c2, x=functional_domain, colour=category)) +
geom_point(position = position_dodge(width = .5)) +
geom_errorbar(aes(ymin=c2-1.96*c2_SE, ymax=c2+1.96*c2_SE),position = position_dodge(width = 0.5)) +
coord_flip() +
theme(axis.text=element_text(size=size_text)) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 80)) +
theme(axis.title = element_text(size=size_text)) + ylab('Shared Environmental Variance (c2)') + xlab('Functional Domain') + theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text))
#ggsave(c2_liab_ggplot,file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/c2_liab_comparison_match.png', height=3.5, width=6)
figure4 <- ggarrange(h2_liab_ggplot,c2_liab_ggplot,labels = c( "a)", 'b)'), nrow = 2, ncol = 1, common.legend=TRUE,legend='bottom',font.label = list(size = size_text) )
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_errorbar).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_errorbar).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_errorbar).
#h2_c2_MaTCH
figure4
ggsave(figure4,
file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/figure4.pdf',
height = 6, width=6, units = 'in', device='pdf', dpi='retina')
size_text <- 3
plot_fig <- ggplot(prev, aes(x=functional_domain, y=prevalence)) +
geom_boxplot(aes(colour=category), outlier.alpha = 0.4,size=.2,outlier.size = .3) +
scale_y_log10() +
ylab('Prevalence') + xlab('Functional Domain') +
theme(legend.position=c(-.15,-.1),legend.box = "horizontal") + theme(axis.text=element_text(size=size_text)) +
theme(axis.text.x = element_text(angle = 30, size=size_text, hjust = .75),
axis.text.y = element_text(size=size_text),
axis.title.x = element_text(size=size_text,vjust=4),
axis.title.y = element_text(size=size_text,vjust=-1)) +
theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text)) +
theme(legend.key.size = unit(.3, "cm")) +
theme(plot.margin=grid::unit(c(t=.2,r=.1,b=-1,l=0), "mm"))
plot_fig <- ggplot(prev, aes(x=functional_domain, y=prevalence)) +
geom_boxplot(aes(colour=category), outlier.alpha = 0.4,size=.2,outlier.size = .3) +
scale_y_log10() +
ylab('Prevalence') + xlab('Functional Domain') +
theme(legend.position=c(-.15,-.1),legend.box = "horizontal") +
theme(axis.text=element_text(size=size_text)) +
theme(axis.text.x = element_text(angle = 30, size=size_text, hjust = .75),
axis.text.y = element_text(size=size_text),
axis.title.x = element_text(size=size_text,vjust=4),
axis.title.y = element_text(size=size_text,vjust=-1)) +
theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text)) +
theme(legend.key.size = unit(.3, "cm")) +
theme(plot.margin=grid::unit(c(t=.2,r=.1,b=-1,l=0), "mm"))
plot_fig
ggsave(plot_fig, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/supp_figure_1.jpg',dpi='print' , device = 'jpeg', width = 3,height= 2)
size_text <- 6
correlation_sibOS_sibSS <- ggplot(allphewas_sibling_correlation , aes(rliabsibling_SS,rliabsibling_OS)) + geom_point(size=.03) +
geom_errorbarh(aes(xmin=rliabsibling_SS-1.96*rliabsibling_SE_SS, xmax=rliabsibling_SS+1.96*rliabsibling_SE_SS),
position = position_dodge(width = 0.01), alpha=.2, size=.2) +
geom_errorbar(aes(ymin=rliabsibling_OS-1.96*rliabsibling_SE_OS, ymax=rliabsibling_OS+1.96*rliabsibling_SE_OS),
position = position_dodge(width = 0.01), alpha=.2, size=.2) +
geom_abline(slope = 1, intercept = 0) +
xlab('Sibling Same Sex Correlation') + ylab('Sibling Opposite Sex Correlation') +
geom_smooth(method = "lm", se = TRUE) +
theme(axis.title.x = element_text(size=size_text),
axis.title.y = element_text(size=size_text),
axis.text.y = element_text(size=size_text),
axis.text.x = element_text(size=size_text)
)
correlation_sibOS_sibSS
## Warning: position_dodge requires non-overlapping x intervals
ggsave(correlation_sibOS_sibSS, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/supp_figure_2.jpg',dpi='print' , device = 'jpeg', width = 3,height= 2)
## Warning: position_dodge requires non-overlapping x intervals
allphewas_sibling_correlation_dff <- allphewas_sibling_correlation %>%
mutate(rsib_diff=rliabsibling_SS-rliabsibling_OS)
diff_sib_corOS_corSS <- ggplot(allphewas_sibling_correlation_dff, aes(rsib_diff)) +
geom_histogram(bins = 60) + xlab("Same Sex Correlation - Opposite Sex Correlation") +
theme(axis.title.x = element_text(size=size_text),
axis.title.y = element_text(size=size_text),
axis.text.y = element_text(size=size_text),
axis.text.x = element_text(size=size_text)
)
diff_sib_corOS_corSS
ggsave(diff_sib_corOS_corSS, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/supp_figure_3.jpg',dpi='print',
device = 'jpeg', width = 3,height= 2)
size_text <- 4
correlation_twinOS_sibOS <- ggplot(allphewas_all_correlation_sibling_twin , aes(rliabos,rliabsibling_OS)) + geom_point(size=.03) +
geom_errorbarh(aes(xmin=rliabos-1.96*rliabos_SE, xmax=rliabos+1.96*rliabos_SE),
position = position_dodge(width = 0.01), alpha=.2, size=.2) +
geom_errorbar(aes(ymin=rliabsibling_OS-1.96*rliabsibling_SE_OS, ymax=rliabsibling_OS+1.96*rliabsibling_SE_OS),
position = position_dodge(width = 0.01), alpha=.2, size=.2) + geom_abline(slope = 1, intercept = 0) +
xlab('Twin Opposite Sex Correlation') +
ylab('Sibling Opposite Sex Correlation') +
theme(axis.title.x = element_text(size=size_text),
axis.title.y = element_text(size=size_text),
axis.text.y = element_text(size=size_text),
axis.text.x = element_text(size=size_text)
)
correlation_twinOS_sibSS <- ggplot(allphewas_all_correlation_sibling_twin , aes(rliabos,rliabsibling_SS)) + geom_point(size=.03) +
geom_errorbarh(aes(xmin=rliabos-1.96*rliabos_SE, xmax=rliabos+1.96*rliabos_SE),
position = position_dodge(width = 0.01), alpha=.2) +
geom_errorbar(aes(ymin=rliabsibling_SS-1.96*rliabsibling_SE_SS, ymax=rliabsibling_SS+1.96*rliabsibling_SE_SS),
position = position_dodge(width = 0.01), alpha=.2) + geom_abline(slope = 1, intercept = 0) +
xlab('Twin Opposite Sex Correlation') +
ylab('Sibling Same Sex Correlation') +
theme(axis.title.x = element_text(size=size_text),
axis.title.y = element_text(size=size_text),
axis.text.y = element_text(size=size_text),
axis.text.x = element_text(size=size_text)
)
correlation_twinOS_sibSS_sibOS <- ggarrange(correlation_twinOS_sibOS,correlation_twinOS_sibSS,labels = c( 'a)','b)'),
ncol = 2,
font.label = list(size = size_text))
## Warning: position_dodge requires non-overlapping x intervals
## Warning: position_dodge requires non-overlapping x intervals
ggsave(correlation_twinOS_sibSS_sibOS, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/supp_figure_4.jpg',dpi='print',
device = 'jpeg', width = 3,height= 2)
correlation_twinOS_sibSS_sibOS
size_text <- 4
barchart_fn_domain_environment <- ggplot(z_meta_all, aes(functional_domain, statistic, fill=stat)) +
geom_bar(stat="identity", position="dodge", width = 0.75, colour="black", size=.2) +
theme(legend.position=c(.5,.75),legend.direction = "horizontal") +
theme(axis.text=element_text(size=size_text)) +
theme(axis.text.x = element_text(angle = 30, size=size_text, hjust = .75),
axis.text.y = element_text(size=size_text),
axis.title.x = element_text(size=size_text,vjust=4),
axis.title.y = element_text(size=size_text,vjust=-1)) +
theme(legend.text=element_text(size=size_text), legend.title = element_text(size=size_text)) +
theme(legend.key.size = unit(.2, "cm"))
barchart_fn_domain_environment
ggsave(barchart_fn_domain_environment, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/supp_figure_5.jpg',dpi='print',
device = 'jpeg', width = 3,height= 2)
size_text <- 4
twinPairsYOB <- read_delim("~/Dropbox (HMS)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/manuscript_data/twinPairsYOB.csv",
"\t", escape_double = FALSE, col_types = cols(phewas_code = col_character()),
trim_ws = TRUE)
twinPairsYOB <- twinPairsYOB %>% inner_join(.,quant_pheno_description, by='phewas_code')
twinPairsYOB <- twinPairsYOB %>% mutate(Description = ifelse(phewas_code == 'COST','Binary Phenotypes', Description))
ggplot_ageDistribution <- ggplot(twinPairsYOB, aes(YOB, fill = Description, colour = Description)) +
geom_density(alpha = 0.1) + facet_wrap(~Description) +
theme(axis.text.x=element_text(angle=45, size = 8),
axis.title.x = element_text(vjust=2)) +
ylab('Density') + xlab('Year of Birth') +
theme(legend.position="none") +
theme(axis.title.x = element_text(size=size_text),
axis.title.y = element_text(size=size_text),
axis.text.y = element_text(size=size_text),
axis.text.x = element_text(size=size_text),
strip.text = element_text(size=size_text)
)
ggplot_ageDistribution
ggsave(ggplot_ageDistribution, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/supp_figure_7.jpg',dpi='print',
device = 'jpeg', width = 3,height= 2)
coefficients_age_gender <- allphewas_both_functional_domain %>% dplyr::select(phewas_code,functional_domain,beta_gender=`as.factor(Gender)M`, beta_age=MemberBirthYear) %>%
filter(!is.na(functional_domain))
coefficients_age_gender$functional_domain <- factor(coefficients_age_gender$functional_domain,
levels = c('All Traits',
'Aging',
'Cardiovascular',
'Cognitive',
'Connective tissue',
'Dermatological',
'Developmental',
'Ear,Nose,Throat',
'Endocrine',
'Environment',
'Gastrointestinal',
'Hematological',
'Immunological',
'Infection',
'Metabolic',
'Neoplasms',
'Neurological',
'Ophthalmological',
'Psychiatric',
'Reproduction',
'Respiratory',
'Skeletal'))
coefficients_age_gender <- coefficients_age_gender %>% filter(!is.na(functional_domain))
plot_coefficient_gender <- ggplot(coefficients_age_gender, aes(x=functional_domain, y=beta_gender)) +
geom_boxplot(aes(colour=functional_domain), outlier.alpha = 0.4,size=.2,outlier.size = .3) +
scale_y_log10() +
ylab('Effect Size - Gender') + xlab('Functional Domain') +
theme(legend.position=c(.3,.95),legend.direction = "horizontal") + theme(axis.text=element_text(size=size_text)) +
theme(axis.text.x = element_text(angle = 30, size=size_text, hjust = .75),
axis.text.y = element_text(size=size_text),
axis.title.x = element_text(size=size_text,vjust=4),
axis.title.y = element_text(size=size_text,vjust=-1)) +
theme(legend.text=element_text(size=size_text-2), legend.title = element_text(size=size_text-2)) +
theme(legend.key.size = unit(.1, "cm"))
ggsave(plot_coefficient_gender, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/supp_figure_8.jpg',dpi='print',
device = 'jpeg', width = 3,height= 2)
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 624 rows containing non-finite values (stat_boxplot).
plot_coefficient_gender
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 624 rows containing non-finite values (stat_boxplot).
plot_coefficient_age <- ggplot(coefficients_age_gender, aes(x=functional_domain, y=beta_age)) +
geom_boxplot(aes(colour=functional_domain), outlier.alpha = 0.4,size=.2,outlier.size = .3) +
scale_y_log10() +
ylab('Effect Size - Age') + xlab('Functional Domain') +
theme(legend.position=c(.3,.95),legend.direction = "horizontal") +
theme(axis.text=element_text(size=size_text)) +
theme(axis.text.x = element_text(angle = 30, size=size_text, hjust = .75),
axis.text.y = element_text(size=size_text),
axis.title.x = element_text(size=size_text,vjust=4),
axis.title.y = element_text(size=size_text,vjust=-1)) +
theme(legend.text=element_text(size=size_text-2), legend.title = element_text(size=size_text-2)) +
theme(legend.key.size = unit(.1, "cm"))
ggsave(plot_coefficient_age, file='~/Dropbox (RagGroup)/RagGroup Team Folder/geneticLinkageAetna/manuscript_analysis/manuscript_files/figures/supp_figure_9.jpg',dpi='print',
device = 'jpeg', width = 3,height= 2)
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 508 rows containing non-finite values (stat_boxplot).
plot_coefficient_age
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 508 rows containing non-finite values (stat_boxplot).